# Strom cells: process reference for integration with MERFISH

In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
require(UpSetR)
options(repr.matrix.max.cols=100, repr.matrix.max.rows=100, repr.plot.res=300)
set.seed(1)

# load processed scRNA reference dataset

In [None]:
completeReference = readr::read_rds('/n/scratch/users/m/mup728/mup728//Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning/pelka_dataset_with_merfish_genes.rds')

In [None]:
completeReference@meta.data$technology = 'scRNA'
completeReference@meta.data$biosample_id = completeReference@meta.data$batchID

In [None]:
completeReference@meta.data$combined_cell_names = colnames(completeReference)
length(unique(completeReference@meta.data$combined_cell_names))

In [None]:
summary(completeReference@meta.data$nCount_RNA)
summary(completeReference@meta.data$nFeature_RNA)

## select Strom cells for finetyping

In [None]:
scRNA_Strom = completeReference@meta.data %>% filter(ClusterTop == 'Strom') %>% select(combined_cell_names) %>% as.matrix() %>% as.vector()
temp = completeReference@meta.data %>% filter(ClusterTop == 'Strom')
rownames(temp) = temp$combined_cell_names
scRNA_Strom = CreateSeuratObject(GetAssayData(completeReference, 'counts')[,scRNA_Strom], meta.data = temp)
scRNA_Strom
rm(temp)

In [None]:
table(scRNA_Strom@meta.data$orig.ident, scRNA_Strom@meta.data$ClusterFull) %>% as.matrix()

In [None]:
table(scRNA_Strom@meta.data$orig.ident) %>% as.matrix()

In [None]:
options(repr.plot.width=5, repr.plot.height=5)
table(scRNA_Strom@meta.data$orig.ident) %>% as.data.frame() %>%
ggplot() + 
geom_histogram(aes(Freq)) 
summary(as.data.frame(table(scRNA_Strom@meta.data$orig.ident))$Freq)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)
constantPalette = rep('red', length(unique(scRNA_Strom@meta.data$ClusterFull)))
names(constantPalette) = unique(scRNA_Strom@meta.data$ClusterFull)
ggplot(scRNA_Strom@meta.data) +
geom_point(aes(x=nFeature_RNA,y=nCount_RNA, color = ClusterFull), shape = '.', alpha = 0.5) +
geom_vline(xintercept = 15, color = 'blue') +
geom_hline(yintercept = 50, color = 'blue') +
scale_x_continuous(trans = 'log10') +
scale_y_continuous(trans = 'log10') +
facet_wrap(~ClusterFull) +
gghighlight() +
scale_color_manual(values = constantPalette) +
theme_minimal(base_size=15) +
theme(legend.position="none") 

#### filter out donors with < 100 Strom cells

In [None]:
donorsToRetain = table(scRNA_Strom@meta.data$orig.ident) %>% as.data.frame() %>% filter(Freq > 100) %>% select(Var1) %>% as.matrix() %>% as.vector()
length(unique(scRNA_Strom@meta.data$orig.ident))
donorsToRetain %>% length()

#### filter out cell types with < 50 cells

In [None]:
typesToRetain = table(scRNA_Strom@meta.data$ClusterFull) %>% as.data.frame() %>% filter(Freq > 50) %>% select(Var1) %>% as.matrix() %>% as.vector()
length(unique(scRNA_Strom@meta.data$ClusterFull))
typesToRetain %>% length()

In [None]:
scRNA_Strom = subset(scRNA_Strom, subset = orig.ident %in% donorsToRetain & ClusterFull %in% typesToRetain)
scRNA_Strom

## scale/normalize/pca

In [None]:
normFactor = scRNA_Strom@meta.data %>%
select(technology, nCount_RNA) %>%
group_by(technology) %>%
summarize(medianCounts = median(nCount_RNA)) 
normFactor = mean(normFactor$medianCounts) #median of medians (across technology) 
normFactor
scRNA_Strom = NormalizeData(scRNA_Strom, 
                          normalization.method = "LogNormalize", 
                          scale.factor = normFactor) %>%
    ScaleData(features = rownames(scRNA_Strom)) %>%
    singlecellmethods::RunBalancedPCA(weight.by='orig.ident', npcs=15)
scRNA_Strom

### in scrna, are the Strom cellssubtypes distinguishable?

#### qualitative look - umap

In [None]:
U = uwot::umap(scRNA_Strom@reductions$pca@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('PCAUMAP1', 'PCAUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(scRNA_Strom)
scRNA_Strom[['pcaumap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'PCAUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(scRNA_Strom)
scRNA_Strom[['pcaumap_fgraph']] = new_graph

In [None]:
temp = Embeddings(scRNA_Strom, 'pcaumap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
pcaumap_embeddings = right_join(scRNA_Strom@meta.data %>% select(combined_cell_names, ClusterTop, ClusterFull, orig.ident), temp)
pcaumap_embeddings %>% head()
pcaumap_embeddings %>% tail()

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
ggplot(pcaumap_embeddings, aes(x = PCAUMAP_1, y = PCAUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Pre-harmony UMAP - Strom cells') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') 

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(pcaumap_embeddings$orig.ident)))
names(constantPalette) = unique(pcaumap_embeddings$orig.ident)
ggplot(pcaumap_embeddings, aes(x = PCAUMAP_1, y = PCAUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Pre-harmony UMAP - Strom cells') +
scale_color_manual(values = constantPalette) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') +
facet_wrap(~orig.ident) + 
gghighlight::gghighlight()

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(pcaumap_embeddings, aes(x = PCAUMAP_1, y = PCAUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Pre-harmony UMAP - Strom cells') +
#ggthemes::scale_color_tableau('Tableau 20' , name = "") + 
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') 

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(pcaumap_embeddings, aes(x = PCAUMAP_1, y = PCAUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Pre-harmony UMAP - Strom cells') +
#ggthemes::scale_color_tableau('Tableau 20' , name = "") + 
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
facet_wrap(~ClusterFull) +
gghighlight::gghighlight()

### harmonize over donor

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
scRNA_Strom = scRNA_Strom %>% harmony::RunHarmony("orig.ident", 
    plot_convergence = TRUE, 
    lambda = NULL,
    max.iter = 10,
    early_stop = F
) 

### post harmony umap

In [None]:
U = uwot::umap(scRNA_Strom@reductions$harmony@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('HUMAP1', 'HUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(scRNA_Strom)
scRNA_Strom[['humap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'HUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(scRNA_Strom)
scRNA_Strom[['humap_fgraph']] = new_graph

In [None]:
temp = Embeddings(scRNA_Strom, 'humap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
humap_embeddings = right_join(scRNA_Strom@meta.data %>% select(combined_cell_names, ClusterTop, ClusterFull, orig.ident), temp)
humap_embeddings %>% head()
humap_embeddings %>% tail()

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - Strom cells') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') 

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(humap_embeddings$orig.ident)))
names(constantPalette) = unique(humap_embeddings$orig.ident)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - Strom cells') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~orig.ident) +
gghighlight::gghighlight()

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - Strom cells') +
#ggthemes::scale_color_tableau('Tableau 20' , name = "") + 
guides(color = guide_legend(override.aes = list(size = 10, shape = 16, alpha = 1))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') 

In [None]:
options(repr.plot.width = 15, repr.plot.height = 15)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - Strom cells') +
#ggthemes::scale_color_tableau('Tableau 20' , name = "") + 
guides(color = guide_legend(override.aes = list(size = 10, shape = 16, alpha = 1))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
facet_wrap(~ClusterFull) +
gghighlight::gghighlight()

## DE genes for clusterfull Strom cells

In [None]:
Idents(scRNA_Strom) = 'ClusterFull'
require(presto)
top_markers(wilcoxauc(scRNA_Strom),
                      n = Inf,
                      padj = 0.05,
                      auc_min = 0.55)

## Cluster at a few resolutions to remove non Strom cells

In [None]:
set.seed(1)
resolutions_test = seq(0.1, 1.5, by = 0.1)
resolutions_test
scRNA_Strom = Seurat::FindClusters(scRNA_Strom, 
                                    graph.name = 'humap_fgraph', 
                                    resolution = resolutions_test, 
                                    verbose = TRUE)
scRNA_Strom

### stash the results that you used to filter out non Strom cells

In [None]:
scRNA_Strom@meta.data$Louvain_for_filtering = droplevels(scRNA_Strom@meta.data$humap_fgraph_res.1.5)
scRNA_Strom@meta.data$Louvain_for_filtering 
scRNA_Strom

In [None]:
readr::write_rds(scRNA_Strom, 'Strom_fineTyping_all_genes.rds')

## DE with wilcox to figure out which of the ~30 clusters are non Strom cells

In [None]:
Idents(scRNA_Strom) = 'Louvain_for_filtering' 
require(presto)
top_markers(wilcoxauc(scRNA_Strom),
                      n = Inf,
                      padj = 0.05,
                      auc_min = 0.6)

In [None]:
humap_embeddings = Embeddings(scRNA_Strom, 'humap') %>% 
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(., scRNA_Strom@meta.data %>% 
              select(combined_cell_names, Louvain_for_filtering, orig.ident))
rbind(humap_embeddings %>% head(), humap_embeddings %>% tail())

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(humap_embeddings$Louvain_for_filtering)))
names(constantPalette) = unique(humap_embeddings$Louvain_for_filtering)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = Louvain_for_filtering)) + 
geom_point(shape = '.', alpha = 1) + 
ggtitle('For filtering non Strom cells:\nOver-clustering of Strom cells UMAP') +
ggpubr::theme_pubr(base_size=20) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~Louvain_for_filtering) +
gghighlight::gghighlight()

In [None]:
scRNA_Strom = readr::read_rds('Strom_fineTyping_all_genes.rds')

## stash clusters used for mapping fine cell types to clean louvain clusters

In [None]:
scRNA_Strom = Seurat::FindClusters(scRNA_Strom, 
                                    graph.name = 'humap_fgraph', 
                                    resolution = 4, 
                                    verbose = TRUE)
scRNA_Strom

In [None]:
levels(scRNA_Strom@meta.data$humap_fgraph_res.4)
length(unique(scRNA_Strom@meta.data$ClusterFull))

In [None]:
scRNA_Strom@meta.data$Louvain_for_fine_types = droplevels(scRNA_Strom@meta.data$humap_fgraph_res.4)
scRNA_Strom@meta.data$Louvain_for_fine_types 

## concordance of new clusters with pelka fine types

In [None]:
options(repr.plot.width = 35, repr.plot.height = 15)
oldClustersInNew3 = table(scRNA_Strom@meta.data$Louvain_for_fine_types, 
                          scRNA_Strom@meta.data$ClusterFull) %>% 
    t() %>% 
    prop.table(1) %>% 
    scale()
colOrder = hclust(dist(t(oldClustersInNew3), 
                       method = "euclidean"), 
                  method = "ward.D" )$order
rowOrder = hclust(dist(oldClustersInNew3, 
                       method = "euclidean"), 
                  method = "ward.D" )$order
oldClustersInNew3[rowOrder, colOrder] %>% 
as.data.frame() %>% 
rename('Fine Type' = Var1, 
       'Louvain Cluster' = Var2, 
       'Scaled\nProportion' = Freq) %>%
ggplot(aes(y = `Fine Type`, 
           x = `Louvain Cluster`)) +
geom_tile(aes(fill = `Scaled\nProportion`), 
          alpha = 1, 
          color = 'black') +
geom_label(aes(label = round(.data$`Scaled\nProportion`, 1)), 
           color = 'red') +
scale_fill_viridis_c(direction = -1) +
theme_minimal(base_size = 16) +
theme(axis.text.x = element_text(angle = 90, 
                                 vjust = 0.5, 
                                 hjust=1)) +
ggtitle('Louvain to map to fine types - step 1\nIdentify mixed clusters')

## Merge clusters

In [None]:
levels(as.factor(scRNA_Strom@meta.data$ClusterFull)) %>% as.data.frame()

In [None]:
scRNA_Strom@meta.data$ClusterFull = as.factor(scRNA_Strom@meta.data$ClusterFull)
scRNA_Strom@meta.data$mergedClusters = fct_recode(
    scRNA_Strom@meta.data$ClusterFull,
    # fibro stem cell niche
    "cS21/cS22 (Fibro stem cell niche)" = "cS21 (Fibro stem cell niche)",
    "cS21/cS22 (Fibro stem cell niche)" = "cS22 (Fibro stem cell niche)",
    # endo capillary
    "cS02/cS03/cS04/cS07 (Endo capillary)" = "cS02 (Endo capillary)",
    "cS02/cS03/cS04/cS07 (Endo capillary)" = "cS03 (Endo capillary)",
    "cS02/cS03/cS04/cS07 (Endo capillary)" = "cS04 (Endo)",
    "cS02/cS03/cS04/cS07 (Endo capillary)" = "cS07 (Endo capillary-like)",
    # endo (I think these are capillary like)
    "cS09/12/14 Endo" = "cS09 (Endo)",
    "cS09/12/14 Endo" = "cS12 (Endo)",
    "cS09/12/14 Endo" = "cS14 (Endo)",
    # Fibro BMP producing
    "cS23/cS24 (Fibro BMP-producing)" = "cS23 (Fibro BMP-producing)",
    "cS23/cS24 (Fibro BMP-producing)" = "cS24 (Fibro BMP-producing)",
    # CCL8 Fibro like
    "cS25/cS30 (Fibro CCL8+)" = "cS30 (CAF CCL8 Fibro-like)",
    "cS25/cS30 (Fibro CCL8+)" = "cS25 (Fibro CCL8+)",
    # pericyte
    "cS15/16/17/18/19 (Pericyte)" = "cS15 (Pericyte)",
    "cS15/16/17/18/19 (Pericyte)" = "cS16 (Pericyte)",
    "cS15/16/17/18/19 (Pericyte)" = "cS17 (Pericyte)",
    "cS15/16/17/18/19 (Pericyte)" = "cS18 (Pericyte)",
    "cS15/16/17/18/19 (Pericyte)" = "cS19 (Pericyte)",
    # endo venous
    "cS05/S13 (Endo venous)" = "cS05 (Endo venous)",
    "cS05/S13 (Endo venous)" = "cS13 (Endo venous-like)",
    # endo arterial
    "cS01/S08 (Endo arterial)" = "cS01 (Endo arterial)",
    "cS01/S08 (Endo arterial)" = "cS08 (Endo arterial-like)"
)
table(scRNA_Strom@meta.data$mergedClusters) %>% as.data.frame()

In [None]:
options(repr.plot.width = 30, repr.plot.height = 15)
oldClustersInNew3 = table(scRNA_Strom@meta.data$Louvain_for_fine_types,
                          scRNA_Strom@meta.data$mergedClusters) %>% 
    t() %>% 
    prop.table(1) %>% 
    scale()
#colOrder = hclust(dist(t(oldClustersInNew3), 
#                       method = "euclidean"), 
#                  method = "ward.D" )$order
rowOrder = hclust(dist(oldClustersInNew3, 
                       method = "euclidean"), 
                  method = "ward.D" )$order
oldClustersInNew3[rowOrder, ] %>% #colOrder] %>% 
as.data.frame() %>% 
rename('Fine Type' = Var1, 
       'Louvain Cluster' = Var2, 
       'Scaled\nProportion' = Freq) %>%
ggplot(aes(y = `Fine Type`, 
           x = `Louvain Cluster`)) +
geom_tile(aes(fill = `Scaled\nProportion`), 
          alpha = 1, 
          color = 'black') +
geom_label(aes(label = round(.data$`Scaled\nProportion`, 1)), 
           color = 'red') +
scale_fill_viridis_c(direction = -1) +
theme_minimal(base_size = 16) +
theme(axis.text.x = element_text(angle = 90, 
                                 vjust = 0.5, 
                                 hjust=1)) +
ggtitle('Louvain to map to fine types - step 1\nIdentify mixed clusters')

## Subcluster mixed Louvain clusters

In [None]:
Idents(scRNA_Strom) = 'Louvain_for_fine_types'
for (i in c(1,2 , 5, 7, 8, 14, 16, 17, 20, 23, 24, 27, 29, 35, 40, 41)){
    scRNA_Strom = FindSubCluster(
      scRNA_Strom,
      i,
      graph.name = 'humap_fgraph',
      subcluster.name = paste('subcluster_', i, sep = ''),
      resolution = 0.1,
      algorithm = 1
    )    
    scRNA_Strom = SetIdent(scRNA_Strom, value = paste('subcluster_', i, sep = ''))
}
scRNA_Strom@meta.data$`subclustered_Louvain_for_fine_types` = scRNA_Strom@meta.data[,paste('subcluster_', i, sep = '')]
scRNA_Strom = SetIdent(scRNA_Strom, value = "subclustered_Louvain_for_fine_types")

## stash subclustered Louvain clusters

In [None]:
scRNA_Strom@meta.data$subclustered_Louvain_for_fine_types = as.factor(scRNA_Strom@meta.data$subclustered_Louvain_for_fine_types)
levels(scRNA_Strom@meta.data$subclustered_Louvain_for_fine_types)
scRNA_Strom = SetIdent(scRNA_Strom, 
                        value = 'subclustered_Louvain_for_fine_types')

## plot concordance

In [None]:
options(repr.plot.width = 60, repr.plot.height = 15)
oldClustersInNew3 = table(scRNA_Strom@meta.data$subclustered_Louvain_for_fine_types, 
                          scRNA_Strom@meta.data$mergedClusters) %>% 
    t() %>% 
    prop.table(1) %>% 
    scale()
colOrder = hclust(dist(t(oldClustersInNew3), 
                       method = "euclidean"), 
                  method = "ward.D" )$order
rowOrder = hclust(dist(oldClustersInNew3, 
                       method = "euclidean"), 
                  method = "ward.D" )$order
oldClustersInNew3[rowOrder, colOrder] %>% 
as.data.frame() %>% 
rename('Fine Type' = Var1, 
       'Louvain Cluster' = Var2, 
       'Scaled\nProportion' = Freq) %>%
ggplot(aes(y = `Fine Type`, 
           x = `Louvain Cluster`)) +
geom_tile(aes(fill = `Scaled\nProportion`), 
          alpha = 1, 
          color = 'black') +
geom_label(aes(label = round(.data$`Scaled\nProportion`, 1)), 
           color = 'red') +
scale_fill_viridis_c(direction = -1) +
theme_minimal(base_size = 16) +
theme(axis.text.x = element_text(angle = 90, 
                                 vjust = 0.5, 
                                 hjust=1)) +
ggtitle('Louvain to map to fine types - step 2\nMap subclustered Louvain clusters back to Pelka fine types')

### plot umap of louvain subclusters

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
humap_embeddings = Embeddings(scRNA_Strom, 'humap') %>% 
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(., scRNA_Strom@meta.data %>% 
              select(combined_cell_names, subclustered_Louvain_for_fine_types))
rbind(humap_embeddings %>% head(), humap_embeddings %>% tail())
options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(humap_embeddings$subclustered_Louvain_for_fine_types)))
names(constantPalette) = unique(humap_embeddings$subclustered_Louvain_for_fine_types)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = subclustered_Louvain_for_fine_types)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Louvain to map to fine types - step 2\nMap subclustered Louvain clusters back to Pelka fine types') +
ggpubr::theme_pubr(base_size=20) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~subclustered_Louvain_for_fine_types) +
gghighlight::gghighlight()

## Relabel and merge clusters

### Here are the new labels:

In [None]:
oldClustersInNew3 = table(scRNA_Strom@meta.data$subclustered_Louvain_for_fine_types, 
                          scRNA_Strom@meta.data$mergedClusters) %>% 
    t() %>% 
    prop.table(1) %>% 
    scale() 
mergedClusters = names(apply(oldClustersInNew3, 2, which.max))
names(mergedClusters) = rownames(oldClustersInNew3)[apply(oldClustersInNew3, 2, which.max)]
mergedClusters 

In [None]:
scRNA_Strom@meta.data$cleaned_fine_types = droplevels(
    fct_recode(
        scRNA_Strom@meta.data$subclustered_Louvain_for_fine_types, 
        !!! mergedClusters))
scRNA_Strom = SetIdent(scRNA_Strom, 
                        value = 'cleaned_fine_types')

In [None]:
cbind(table(scRNA_Strom@meta.data$cleaned_fine_types) %>% as.matrix(),
table(scRNA_Strom@meta.data$mergedClusters) %>% as.matrix()) %>%
as.data.frame() %>%
rename("Cleaned Fine Types" = V1, "Pelka Labels" = V2)

In [None]:
options(repr.plot.width = 50, repr.plot.height = 10)
oldClustersInNew3 = table(scRNA_Strom@meta.data$subclustered_Louvain_for_fine_types, 
                          scRNA_Strom@meta.data$ClusterFull) %>% 
    t() %>% 
    prop.table(1) %>% 
    scale()
colOrder = hclust(dist(t(oldClustersInNew3), 
                       method = "euclidean"), 
                  method = "ward.D" )$order
rowOrder = hclust(dist(oldClustersInNew3, 
                       method = "euclidean"), 
                  method = "ward.D" )$order
oldClustersInNew3[rowOrder, colOrder] %>% 
as.data.frame() %>% 
rename('Fine Type' = Var1, 
       'Louvain Cluster' = Var2, 
       'Scaled\nProportion' = Freq) %>%
ggplot(aes(y = `Fine Type`, 
           x = `Louvain Cluster`)) +
geom_tile(aes(fill = `Scaled\nProportion`), 
          alpha = 1, 
          color = 'black') +
#geom_label(aes(label = round(.data$`Scaled\nProportion`, 1)), 
#           color = 'red') +
scale_fill_viridis_c(direction = -1) +
theme_minimal(base_size = 16) +
theme(axis.text.x = element_text(angle = 90, 
                                 vjust = 0.5, 
                                 hjust=1)) +
ggtitle('Louvain to map to fine types - step 2\nMap subclustered Louvain clusters back to Pelka fine types')

## drop conflicts

In [None]:
scRNA_Strom = subset(scRNA_Strom, subset = cleaned_fine_types == mergedClusters)

In [None]:
options(repr.plot.width = 50, repr.plot.height = 10)
oldClustersInNew3 = table(scRNA_Strom@meta.data$cleaned_fine_types, 
                          scRNA_Strom@meta.data$mergedClusters) %>% 
    t() %>% 
    prop.table(1) %>% 
    scale()
colOrder = hclust(dist(t(oldClustersInNew3), 
                       method = "euclidean"), 
                  method = "ward.D" )$order
rowOrder = hclust(dist(oldClustersInNew3, 
                       method = "euclidean"), 
                  method = "ward.D" )$order
oldClustersInNew3[rowOrder, colOrder] %>% 
as.data.frame() %>% 
rename('Fine Type' = Var1, 
       'Louvain Cluster' = Var2, 
       'Scaled\nProportion' = Freq) %>%
ggplot(aes(y = `Fine Type`, 
           x = `Louvain Cluster`)) +
geom_tile(aes(fill = `Scaled\nProportion`), 
          alpha = 1, 
          color = 'black') +
#geom_label(aes(label = round(.data$`Scaled\nProportion`, 1)), 
#           color = 'red') +
scale_fill_viridis_c(direction = -1) +
theme_minimal(base_size = 16) +
theme(axis.text.x = element_text(angle = 90, 
                                 vjust = 0.5, 
                                 hjust=1)) +
ggtitle('Louvain to map to fine types - step 2\nMap subclustered Louvain clusters back to Pelka fine types')

In [None]:
scRNA_Strom

## Justify new labels
- Once we have labels, we want to justify them to collaborators
- DGE analysis in original labels (post-QC cells) 
- DGE analysis in new labels 
- Correlate logFC in matched clusters

### DGE analysis in original labels (post-QC cells) 

In [None]:
clusterFull_markers = presto::wilcoxauc(scRNA_Strom, 'mergedClusters') 
top_markers(clusterFull_markers,
    n = Inf,
    padj = 0.05,
    auc_min = 0.6)

### DGE analysis in new labels 

In [None]:
cleaned_fine_types_markers = presto::wilcoxauc(scRNA_Strom, 'cleaned_fine_types') 
top_markers(cleaned_fine_types_markers,
    n = Inf,
    padj = 0.05,
    auc_min = 0.6)

### Correlate logFC in matched clusters

In [None]:
clusterFull_markers = clusterFull_markers %>% filter(group %in% cleaned_fine_types_markers$group)
clusterFull_markers$Comparison = 'ClusterFull'
cleaned_fine_types_markers$Comparison = 'cleaned_fine_types'

In [None]:
comparison_markers = dplyr::left_join(x = clusterFull_markers %>% select(feature, group, logFC), 
                                      y = cleaned_fine_types_markers %>% select(feature, group, logFC),
                                    by = join_by(feature, group))
colnames(comparison_markers) = c('feature', 'group', 'clusterFull', 'cleaned_fine_types')
getColors = coef(lm(cleaned_fine_types ~ clusterFull, 
                    data = comparison_markers))
getIntercept = as.numeric(getColors)[1]
getSlope = as.numeric(getColors)[2]
comparison_markers = comparison_markers %>% 
    mutate(ye = getIntercept + (getSlope*cleaned_fine_types), color = cleaned_fine_types < ye) %>% 
    mutate(labelTRUE = (cleaned_fine_types > quantile(cleaned_fine_types, 0.25)) | (clusterFull > quantile(clusterFull, 0.25))) %>% 
    mutate(label = if_else(labelTRUE, feature, NA)) 
rbind(head(comparison_markers), tail(comparison_markers))

In [None]:
options(repr.plot.width=40, repr.plot.height=30)
ggplot(comparison_markers, aes(x=clusterFull, y=cleaned_fine_types)) + 
geom_point(shape = '.', alpha = 0.5) +
facet_wrap(~group) + 
theme_pubr(base_size = 16) +
xlab('Pelka cell types') +
ylab('Cleaned fine types') +
geom_hline(aes(yintercept = 0)) +
geom_vline(aes(xintercept = 0)) +
ggrepel::geom_label_repel(aes(label=feature, color=color)) + 
geom_smooth(method = "lm", se = FALSE, linetype = 1, alpha = 0.15, linewidth = 0.1)

### Pearson correlations

In [None]:
cellTypeCor = comparison_markers %>% 
group_by(group) %>% 
dplyr::summarize(cor(clusterFull, cleaned_fine_types))
colnames(cellTypeCor) = c('group', 'correlation')
cellTypeCor

## Visualize final clusters in UMAP space, and also the original labels (after merge)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
humap_embeddings = Embeddings(scRNA_Strom, 'humap') %>% 
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(., scRNA_Strom@meta.data %>% 
              select(combined_cell_names, cleaned_fine_types, mergedClusters))
rbind(humap_embeddings %>% head(), humap_embeddings %>% tail())

#### Cleaned clusters

In [None]:
options(repr.plot.width = 21, repr.plot.height = 21)
constantPalette = rep('red', length(unique(humap_embeddings$cleaned_fine_types)))
names(constantPalette) = unique(humap_embeddings$cleaned_fine_types)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = cleaned_fine_types)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggpubr::theme_pubr(base_size=13) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~cleaned_fine_types) +
gghighlight::gghighlight() +
ggtitle('Cleaned fine types - Strom cells')

#### original clusters

In [None]:
options(repr.plot.width = 21, repr.plot.height = 21)
constantPalette = rep('red', length(unique(humap_embeddings$mergedClusters)))
names(constantPalette) = unique(humap_embeddings$mergedClusters)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = mergedClusters)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggpubr::theme_pubr(base_size=13) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~mergedClusters) +
gghighlight::gghighlight() +
ggtitle('merged Pelka types - Strom cells')

## GLMM to find DE genes between clusters

In [None]:
require(tidyverse)
require(Seurat)
require(data.table)
require(lme4)
require(presto)
require(singlecellmethods)
require(future)
require(furrr)
require(gghighlight)
options(future.globals.maxSize = 1000 * 1024 ^2)
set.seed(1)

In [None]:
require(presto)
require(singlecellmethods)
pb = presto::collapse_counts(
    GetAssayData(scRNA_Strom, 'counts'), 
    scRNA_Strom@meta.data, 
    c("biosample_id", "cleaned_fine_types"), 
    min_cells_per_group = 3
)
pb$meta_data %>% head()
dim(pb$counts)

colnames(scRNA_Strom@meta.data)
dim(pb$meta_data)
head(pb$meta_data)
dim(pb$counts_mat)

In [None]:
pb$exprs_norm = pb$exprs_norm[rownames(pb$counts_mat), colnames(pb$counts_mat)]
dim(pb$exprs_norm)
pb$exprs_norm[1:5, 1:5]

In [None]:
presto_res = readr::read_rds('Strom_fineTyping_GLMM.rds')

### Make contrasts

In [None]:
contrasts_mat = make_contrast.presto(
    presto_res, 
    var_contrast = 'cleaned_fine_types')
contrasts_mat

### Find marginal effects

In [None]:
effects_marginal = contrasts.presto(
    presto_res, 
    contrasts_mat, 
    one_tailed = TRUE
) %>% 
    dplyr::mutate(cluster = contrast) %>% 
    dplyr::mutate(
        logFC = sign(beta) * log2(exp(abs(beta))), ## convert stats to log2 for interpretability 
        SD = log2(exp(sigma)),
        zscore = logFC / SD
    ) %>%
    arrange(pvalue)

effects_marginal$fdr = p.adjust(effects_marginal$pvalue, method = 'BH')
effects_marginal$corr_fdr = effects_marginal$fdr
effects_marginal$corr_fdr[effects_marginal$fdr == 0] = min(effects_marginal$fdr[effects_marginal$fdr != 0])
effects_marginal$`-log10_fdr` = (-1) * log10(effects_marginal$corr_fdr) 
dim(effects_marginal)
head(effects_marginal)


In [None]:
unique(effects_marginal$cluster)
unique(scRNA_Strom@meta.data$ClusterFull)

In [None]:
meanExp = rowMeans(GetAssayData(scRNA_Strom, 'data')) 
meanExp = data.frame(feature = names(meanExp), meanExp = meanExp)
for (cluster in unique(effects_marginal$cluster)) {
    print(cluster)
    temp = GetAssayData(scRNA_Strom, 'counts')[,scRNA_Strom@meta.data$sampleID[scRNA_Strom@meta.data$cleaned_fine_types == cluster]] %>% as.data.frame()
    print(dim(temp))
    temp = temp %>%
    rowwise() %>%
    mutate(`N_zeros` = sum(c_across(everything()) == 0)) %>%
    select(`N_zeros`) %>% as.data.frame()
    rownames(temp) = rownames(GetAssayData(scRNA_Strom, 'counts'))
    meanExp[,cluster] = temp$`N_zeros`/length(scRNA_Strom@meta.data$sampleID[scRNA_Strom@meta.data$cleaned_fine_types == cluster])
}
head(meanExp)

#effects_marginal = effects_marginal %>% left_join(meanExp, by = join_by(feature), relationship = 'many-to-one') 
#tail(effects_marginal)

In [None]:
temp = meanExp %>% pivot_longer(cols = unique(effects_marginal$cluster)) 
colnames(temp) = c('feature', 'meanExp', 'contrast', 'prop.expressed')
temp$prop.expressed = 1 - temp$prop.expressed 
effects_marginal = left_join(effects_marginal, temp, by = join_by(feature, contrast)) 
rm(temp)
head(effects_marginal)

# volcano plot

In [None]:
sum(effects_marginal$logFC > 2)
sum(effects_marginal$logFC > 0.5)
sum(effects_marginal$logFC > 0.1)
effects_marginal%>% group_by(cluster) %>% filter(fdr < 0.05) %>% summarize(medianLFC = median(logFC), n = n())

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20, repr.plot.res = 200)
require(ggrepel)
ggplot(effects_marginal, aes(x = logFC, y = `-log10_fdr`)) +
geom_point(shape = '.') +
geom_point(data = effects_marginal[effects_marginal$`-log10_fdr` > (-1) * log10(0.05),], color = 'blue', shape = 16) +
geom_point(data = effects_marginal[effects_marginal$logFC > 2,], color = 'red', shape = 16) +
facet_wrap(~cluster, scales = 'free_y', ncol= 3) +
theme_bw(base_size = 20) +
geom_hline(aes(yintercept = (-1)*log10(0.05)), color = 'lightgrey') +
geom_vline(aes(xintercept = 2), color = 'lightgrey') + 
geom_label_repel(data = effects_marginal[effects_marginal$logFC > 2,], aes(label = feature)) 

In [None]:
#effects_marginal$`-log10_fdr`[effects_marginal$`-log10_fdr` > 50] = 50

## MA plot

In [None]:
# logFC vs mean of normalized counts
ggplot(effects_marginal, aes(x = meanExp, y = logFC)) +
geom_point(shape = '.') +
geom_point(data = effects_marginal[effects_marginal$logFC > 2,], color = 'red', shape = 16) +
geom_label_repel(data = effects_marginal[effects_marginal$logFC > 2,], aes(label = feature)) +
geom_point(data = effects_marginal[effects_marginal$meanExp > 1,], color = 'blue', shape = 16) +
geom_label_repel(data = effects_marginal[effects_marginal$meanExp > 1,], aes(label = feature)) +
facet_wrap(~cluster) +
theme_bw(base_size = 20) +
xlab('Mean normalized expression')

In [None]:
ggplot(effects_marginal, aes(meanExp)) +
geom_histogram() +
facet_wrap(~cluster) +
theme_bw(base_size = 20) +
scale_y_continuous(trans = 'log10') +
xlab('Mean expression') +
ylab('Frequency')

In [None]:
unique(effects_marginal$cluster)
scRNA_Strom@meta.data$ClusterFull %>% unique()

In [None]:
# logFC vs percent expressed
options(repr.plot.height = 40, repr.plot.width = 30)
ggplot(effects_marginal, aes(x = prop.expressed, y = logFC)) +
geom_point(shape = '.') +
geom_point(data = effects_marginal[effects_marginal$logFC > 2,], color = 'red', shape = 16) +
geom_label_repel(data = effects_marginal[effects_marginal$logFC > 2,], aes(label = feature), color = 'red') +
geom_point(data = effects_marginal[effects_marginal$prop.expressed > 0.5,], color = 'blue', shape = 16) +
geom_label_repel(data = effects_marginal[effects_marginal$prop.expressed > 0.5,], aes(label = feature), color = 'blue') +
facet_wrap(~cluster) +
theme_bw(base_size = 20) +
xlab('Proportions of cells expressing the gene')

In [None]:
write.csv(effects_marginal, 'effects_marginal_Strom.csv')

## filters 

In [None]:
head(effects_marginal)

In [None]:
effects_marginal2 = effects_marginal %>% 
filter(logFC > 0.1) %>%
filter(prop.expressed > 0.05)

In [None]:
dim(effects_marginal)

### Make a plot of cells lost after QC at every nGene cutoff

- x-axis: number of genes kept
- y-axis: number of cells kept

In [None]:
dim(scRNA_Strom)
effects_marginal$feature %>% 
unique() %>% 
length()

In [None]:
genes_ranked_by_max_zscore = effects_marginal %>% 
        group_by(feature) %>% 
        mutate(max_zscore = max(zscore)) %>% 
        select(feature, max_zscore) %>% 
        distinct() %>%
        arrange(desc(max_zscore))
dim(genes_ranked_by_max_zscore)
rbind(head(genes_ranked_by_max_zscore), tail(genes_ranked_by_max_zscore))

In [None]:
nGenes_used = seq(from = 100, to = nrow(genes_ranked_by_max_zscore), by = 25)
nGenes_used
nCells_left = rep(NA, length(nGenes_used))
names(nCells_left) = as.character(nGenes_used)
countMat = GetAssayData(scRNA_Strom, 'counts')
dim(countMat)
for(trial in nGenes_used){
    selectedGenes = genes_ranked_by_max_zscore$feature[1:trial]
    print(length(selectedGenes %>% unique()))
    counts = countMat[selectedGenes,]
    # filter
    nFeatures = colSums(counts > 0)
    nCounts = colSums(counts)
    nCells_left[[as.character(trial)]] = sum(nFeatures > 10 & nCounts > 30)
}

options(repr.plot.width=9, repr.plot.height=9)
nCells_left %>% 
as.data.frame() %>% 
rename('nCells_left' = colnames(.)[1]) %>% 
rownames_to_column(var = "nGenes_used") %>%
mutate(nGenes_used = as.character(.$nGenes_used)) %>%
ggplot() + geom_point(aes(x = nGenes_used, y=nCells_left)) + 
ggpubr::theme_pubr() + 
scale_y_continuous(trans='log10') + 
xlab('Number of genes used') + 
ylab('Number of cells left') + 
geom_segment(aes(xend=nGenes_used, 
                 x = nGenes_used, 
                 y=nCells_left), 
             yend=0, 
             linetype=3) + 
ggtitle('GLMM DEGs - TNKILC cells') 

### Make a plot of cells lost after QC at every nGene cutoff

- x-axis: number of genes kept
- y-axis: number of cells kept

In [None]:
dim(scRNA_Strom)
effects_marginal$feature %>% 
unique() %>% 
length()

In [None]:
genes_ranked_by_max_zscore = effects_marginal %>% 
        group_by(feature) %>% 
        mutate(max_zscore = max(zscore)) %>% 
        select(feature, max_zscore) %>% 
        distinct() %>%
        arrange(desc(max_zscore))
dim(genes_ranked_by_max_zscore)
rbind(head(genes_ranked_by_max_zscore), tail(genes_ranked_by_max_zscore))

In [None]:
nGenes_used = seq(from = 100, to = nrow(genes_ranked_by_max_zscore), by = 25)
nGenes_used
nCells_left = rep(NA, length(nGenes_used))
names(nCells_left) = as.character(nGenes_used)
countMat = GetAssayData(scRNA_Strom, 'counts')
dim(countMat)
for(trial in nGenes_used){
    selectedGenes = genes_ranked_by_max_zscore$feature[1:trial]
    print(length(selectedGenes %>% unique()))
    counts = countMat[selectedGenes,]
    # filter
    nFeatures = colSums(counts > 0)
    nCounts = colSums(counts)
    nCells_left[[as.character(trial)]] = sum(nFeatures > 10 & nCounts > 30)
}

options(repr.plot.width=9, repr.plot.height=9)
nCells_left %>% 
as.data.frame() %>% 
rename('nCells_left' = colnames(.)[1]) %>% 
rownames_to_column(var = "nGenes_used") %>%
mutate(nGenes_used = as.character(.$nGenes_used)) %>%
ggplot() + geom_point(aes(x = nGenes_used, y=nCells_left)) + 
ggpubr::theme_pubr() + 
scale_y_continuous(trans='log10') + 
xlab('Number of genes used') + 
ylab('Number of cells left') + 
geom_segment(aes(xend=nGenes_used, 
                 x = nGenes_used, 
                 y=nCells_left), 
             yend=0, 
             linetype=3) + 
ggtitle('GLMM DEGs - Strom cells') 

## Select 175 genes and cells with counts > 30 and features > 10

In [None]:
selectedGenes = genes_ranked_by_max_zscore$feature[1:175]
selectedGenes %>% length()
selectedGenes

In [None]:
scRNA_Strom_selected_genes = scRNA_Strom[selectedGenes, ]

In [None]:
selectedCells = scRNA_Strom_selected_genes@meta.data %>% 
    filter(nFeature_RNA > 10 & nCount_RNA > 30) %>%
    select(combined_cell_names) %>%
    as.matrix() %>%
    as.vector()
length(selectedCells)
c(head(selectedCells), tail(selectedCells))

In [None]:
scRNA_Strom_selected_genes = scRNA_Strom[selectedGenes, selectedCells]
summary(scRNA_Strom_selected_genes@meta.data$nFeature_RNA)
summary(scRNA_Strom_selected_genes@meta.data$nCount_RNA)

In [None]:
options(repr.plot.height = 7, repr.plot.width = 14)
(ggplot(scRNA_Strom_selected_genes@meta.data) +
geom_histogram(aes(nFeature_RNA)) +
ggtitle('nFeature_RNA') +
scale_y_continuous(trans = 'log10')) +
(ggplot(scRNA_Strom_selected_genes@meta.data) +
geom_histogram(aes(nCount_RNA)) +
ggtitle('nCount_RNA') +
scale_y_continuous(trans = 'log10'))

## Cache

### Strom cells with all genes

In [None]:
readr::write_rds(scRNA_Strom, 'Strom_fineTyping_all_genes.rds')

### Strom cells with selected genes

In [None]:
readr::write_rds(scRNA_Strom_selected_genes, 'Strom_fineTyping_selected_genes.rds')

In [None]:
table(scRNA_Strom_selected_genes@meta.data$cleaned_fine_types)

In [None]:
getwd()

In [None]:
scRNA_Strom = readr::read_rds('Strom_fineTyping_all_genes.rds')

In [None]:
#scRNA_Strom = readr::read_rds('Strom_fineTyping_selected_genes.rds')

In [None]:
rownames(scRNA_Strom)

In [None]:
FeaturePlot(scRNA_Strom, c('MFAP5', 'SFRP2', 'CCL19'))

In [None]:
DimPlot(scRNA_Strom, group.by = 'cleaned_fine_types')

In [None]:
DimPlot(scRNA_Strom, group.by = 'ClusterFull', label = TRUE) + guides(color = "none")

In [None]:
selectedGenes = genes_ranked_by_max_zscore$feature[1:275]
selectedGenes %>% length()
selectedGenes

## Subclustering: are smooth muscle cells a combination of 2 clusters? (Ilya: NOTCH3 and GJA4 suggest VSMC and other SMCs)

In [None]:
scRNA_Strom_selected_genes = readr::read_rds('Strom_fineTyping_selected_genes.rds')
scRNA_Strom_selected_genes

In [None]:
scRNA_Strom_selected_genes@meta.data$ClusterFull %>% unique

In [None]:
normFactor = scRNA_Strom_selected_genes@meta.data %>%
select(orig.ident, nCount_RNA) %>%
group_by(orig.ident) %>%
summarize(medianCounts = median(nCount_RNA)) 
normFactor = mean(normFactor$medianCounts) #median of medians
normFactor
scRNA_Strom_selected_genes = NormalizeData(scRNA_Strom_selected_genes, 
                          normalization.method = "LogNormalize", 
                          scale.factor = normFactor) %>%
    ScaleData(features = rownames(scRNA_Strom_selected_genes)) %>%
    singlecellmethods::RunBalancedPCA(weight.by='orig.ident', npcs=15)
scRNA_Strom_selected_genes

options(repr.plot.width = 5, repr.plot.height = 5)
scRNA_Strom_selected_genes = scRNA_Strom_selected_genes %>% harmony::RunHarmony("orig.ident", 
    plot_convergence = TRUE, 
    lambda = NULL,
    max.iter = 10,
    early_stop = F
) 

U = uwot::umap(scRNA_Strom_selected_genes@reductions$harmony@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('HUMAP1', 'HUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(scRNA_Strom_selected_genes)
scRNA_Strom_selected_genes[['humap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'HUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(scRNA_Strom_selected_genes)
scRNA_Strom_selected_genes[['humap_fgraph']] = new_graph

humap_embeddings = Embeddings(scRNA_Strom_selected_genes, 'humap') %>% 
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(., scRNA_Strom_selected_genes@meta.data %>% 
              select(combined_cell_names, ClusterFull, orig.ident))
rbind(humap_embeddings %>% head(), humap_embeddings %>% tail())

options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(humap_embeddings$orig.ident)))
names(constantPalette) = unique(humap_embeddings$orig.ident)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony, post-filter (step 1) UMAP - Strom cells') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~orig.ident) +
gghighlight::gghighlight()

options(repr.plot.width = 10, repr.plot.height = 7)
constantPalette = rep('red', length(unique(humap_embeddings$ClusterFull)))
names(constantPalette) = unique(humap_embeddings$ClusterFull)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony, post-filter (step 1) UMAP - Strom cells') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~ClusterFull) +
gghighlight::gghighlight()

In [None]:
cluster = "cS32 (Smooth Muscle)"

Idents(scRNA_Strom_selected_genes) = 'ClusterFull'

scRNA_Strom_selected_genes = FindSubCluster(
    scRNA_Strom_selected_genes,
    cluster,
    graph.name = 'humap_fgraph',
    subcluster.name = make.names(paste("sub.cluster_", cluster, sep = "")),
    resolution = 0.5,
    algorithm = 1
)

Idents(scRNA_Strom_selected_genes) = make.names(paste("sub.cluster_", cluster, sep = ""))

In [None]:
smoothMuscleCells = subset(scRNA_Strom_selected_genes, ClusterFull == "cS32 (Smooth Muscle)")

In [None]:
smoothMuscleCells@meta.data$technology %>% unique

In [None]:
require(presto)
Idents(smoothMuscleCells) = make.names(paste("sub.cluster_", cluster, sep = ""))

In [None]:
unique(Idents(smoothMuscleCells))

In [None]:
top_markers(wilcoxauc(smoothMuscleCells),
                      n = Inf,
                      padj = 0.05,
                      auc_min = 0.55)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10, repr.plot.res = 300)
DimPlot(smoothMuscleCells, label = TRUE, repel = TRUE, label.box = TRUE) + scale_color_tableau('Tableau 20') + scale_fill_tableau('Tableau 20') + FeaturePlot(smoothMuscleCells, features = c('NOTCH3', 'GJA4'))

In [None]:
table(scRNA_Strom@meta.data$ClusterFull)