# Epi: Enrich fine type marker gene profiles with Epi NMF programs and when possible, label fine types with major NMF programs. 

In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
require(tidyverse)
require(Seurat)
require(data.table)
require(lme4)
require(presto)
require(singlecellmethods)
require(future)
require(furrr)
require(gghighlight)
require(readr)
require(clusterProfiler)
options(future.globals.maxSize = 1000 * 1024 ^2)
require(gridExtra)
set.seed(1)
options(repr.plot.res=300)
options(repr.matrix.max.cols=50, repr.matrix.max.rows=10000)

## Load reference Epi cells

In [None]:
source('/n/scratch3/users/m/mup728/Cell_Typing_CRC_MERFISH/Fine_typing_with_weighted_KNN/Strom/fine_typing_libraries.r')

loadLibraries()
set.seed(1)

scRNA_Epi = readr::read_rds('/n/scratch3/users/m/mup728/Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning/Epi_fineTyping_selected_genes.rds')
scRNA_Epi@meta.data$technology = 'scRNA'
scRNA_Epi@meta.data$combined_cell_names = colnames(scRNA_Epi)

cellType = 'Epi'

selectedGenes = rownames(scRNA_Epi)



## Healthy & tumor Epi types

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Epi@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return('Tumor')}}))) %>%
group_by(ClusterFull, MMRStatus, biosample_id) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~ClusterFull, scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Epi@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return(x)}}))) %>%
group_by(ClusterFull, MMRStatus, biosample_id) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~ClusterFull, scale = 'free_x') +
#stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

## Load coarse typed objects

In [None]:
coarseTypedSeurats = list.files('/n/scratch3/users/m/mup728/Pelka_Baysor_segmentation/coarse_typing_with_weighted_knn/Coarse_typing_with_weighted_knn/MSI/')[grep(x = list.files('/n/scratch3/users/m/mup728/Pelka_Baysor_segmentation/coarse_typing_with_weighted_knn/Coarse_typing_with_weighted_knn/MSI/'), pattern = "coarse.*.rds")]

sampleIDs = sample(gsub(x = coarseTypedSeurats, pattern = 'coarse_|.rds', replacement = ""))

In [None]:
sampleIDs

## Visualize on UMAP

In [None]:
scRNA_Epi

In [None]:
U = uwot::umap(scRNA_Epi@reductions$harmony@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('HUMAP1', 'HUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(scRNA_Epi)
scRNA_Epi[['humap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'HUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(scRNA_Epi)
scRNA_Epi[['humap_fgraph']] = new_graph

In [None]:
scRNA_Epi@meta.data$ClusterFull %>% table()

In [None]:
temp = Embeddings(scRNA_Epi, 'humap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
humap_embeddings = right_join(scRNA_Epi@meta.data %>% select(combined_cell_names, ClusterTop, ClusterFull, biosample_id), temp)
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 2) + 
ggtitle('Reference UMAP - Epi cells') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
scale_color_tableau('Tableau 20') 

In [None]:
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 1) + 
ggtitle('Reference UMAP - Epi cells') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
scale_color_tableau('Tableau 20') +
facet_wrap(~ClusterFull) +
gghighlight()

## Get NMF programs

In [None]:
all_nmf_programs = read_excel('/n/scratch3/users/m/mup728/Cell_Typing_CRC_MERFISH/Fine_typing_with_weighted_KNN/Table_S2_Pelka_atlas.xlsx', sheet = "D. Program top genes")

Epi_nmf_programs = all_nmf_programs[,colnames(x = all_nmf_programs)[grep(colnames(all_nmf_programs), pattern = 'pM')]]

Epi_nmf_programs

In [None]:
colnames(Epi_nmf_programs)

### Find markers for Epi fine types

In [None]:
require(presto)
wilcox_Epi = wilcoxauc(scRNA_Epi)
topMarkers_Epi = top_markers(wilcox_Epi, auc_min = 0.55, padj_max = 0.05, n = Inf)[,-1]
topMarkers_Epi

### Enrich NMF programs in subcluster markers using clusterProfiler::enricher (despite the name, this is an implementation of the hypergeometric test)

In [None]:
TERM2GENE = Epi_nmf_programs %>% pivot_longer(cols = colnames(.))
colnames(TERM2GENE) = c('term', 'gene')
head(TERM2GENE)

In [None]:
enrichmentList = map(colnames(topMarkers_Epi), function(i){
    return(as.data.frame(enricher(topMarkers_Epi[,i] %>% as.matrix() %>% as.vector() %>% na.omit(), 
              TERM2GENE = TERM2GENE)))
})
names(enrichmentList) = colnames(topMarkers_Epi)
enrichmentList = bind_rows(enrichmentList, .id = "epi_fine_type") %>% as_tibble()
enrichmentList

### Relabel subclusters with top NMF programs

In [None]:
enrichmentList %>%
group_by(epi_fine_type) %>%
top_n(n = 1, wt = -p.adjust)

In [None]:
scRNA_Epi@meta.data$ClusterFull %>% table() %>% as.matrix()

### Comment:

Some fine types are not enriched in/defined by specific NMF programs. 

## De novo cluster Epi with Louvain and repeat NMF program enrichment