# Myeloid: subcluster macrophage and monocyte cells. Enrich subcluster marker gene profiles with myeloid NMF programs and when possible, label subclusters with major NMF programs. 

In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
require(tidyverse)
require(Seurat)
require(data.table)
require(lme4)
require(presto)
require(singlecellmethods)
require(future)
require(furrr)
require(gghighlight)
require(readr)
require(clusterProfiler)
options(future.globals.maxSize = 1000 * 1024 ^2)
require(gridExtra)
set.seed(1)
options(repr.plot.res=300)
options(repr.matrix.max.cols=50, repr.matrix.max.rows=10000)

In [None]:
sessionInfo()

## Load reference myeloid cells

In [None]:
source('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Fine_typing_with_weighted_KNN/Strom/fine_typing_libraries.r')

loadLibraries()
set.seed(1)

scRNA_Myeloid = readr::read_rds('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning/Myeloid_fineTyping_all_genes.rds')
scRNA_Myeloid@meta.data$technology = 'scRNA'
scRNA_Myeloid@meta.data$combined_cell_names = colnames(scRNA_Myeloid)

cellType = 'Myeloid'

selectedGenes = rownames(scRNA_Myeloid)


## Healthy and tumor Myeloid fine types

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Myeloid@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return('Tumor')}}))) %>%
group_by(ClusterFull, MMRStatus, biosample_id) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~ClusterFull, scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Myeloid@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return(x)}}))) %>%
group_by(ClusterFull, MMRStatus, biosample_id) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~ClusterFull, scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

## Load coarse typed objects

In [None]:
coarseTypedSeurats = list.files('/n/scratch/users/m/mup728/mup728/Pelka_Baysor_segmentation/coarse_typing_with_weighted_knn/Coarse_typing_with_weighted_knn/MSI/')[grep(x = list.files('/n/scratch/users/m/mup728/mup728/Pelka_Baysor_segmentation/coarse_typing_with_weighted_knn/Coarse_typing_with_weighted_knn/MSI/'), pattern = "coarse.*.rds")]

sampleIDs = sample(gsub(x = coarseTypedSeurats, pattern = 'coarse_|.rds', replacement = ""))

In [None]:
sampleIDs

## Visualize on UMAP

In [None]:
scRNA_Myeloid

In [None]:
U = uwot::umap(scRNA_Myeloid@reductions$harmony@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('HUMAP1', 'HUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(scRNA_Myeloid)
scRNA_Myeloid[['humap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'HUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(scRNA_Myeloid)
scRNA_Myeloid[['humap_fgraph']] = new_graph

In [None]:
scRNA_Myeloid@meta.data$ClusterFull %>% table()

In [None]:
temp = Embeddings(scRNA_Myeloid, 'humap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
humap_embeddings = right_join(scRNA_Myeloid@meta.data %>% select(combined_cell_names, ClusterTop, ClusterFull, biosample_id), temp)
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 2) + 
ggtitle('Reference UMAP - Myeloid cells') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
scale_color_tableau('Tableau 20') 

In [None]:
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 1) + 
ggtitle('Reference UMAP - Myeloid cells') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
scale_color_tableau('Tableau 20') +
facet_wrap(~ClusterFull) +
gghighlight()

## Get NMF programs

In [None]:
all_nmf_programs = read_excel('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Fine_typing_with_weighted_KNN/Table_S2_Pelka_atlas.xlsx', sheet = "D. Program top genes")
all_nmf_programs %>% write.csv('all_nmf_programs.csv')
myeloid_nmf_programs = all_nmf_programs[,colnames(x = all_nmf_programs)[grep(colnames(all_nmf_programs), pattern = 'pM\\d')]]

myeloid_nmf_programs

In [None]:
colnames(myeloid_nmf_programs)

## Subcluster monocyte populations

In [None]:
scRNA_Myeloid

In [None]:
cluster = "cM01 (Monocyte)"

Idents(scRNA_Myeloid) = 'ClusterFull'

scRNA_Myeloid = FindSubCluster(
    scRNA_Myeloid,
    cluster,
    graph.name = 'humap_fgraph',
    subcluster.name = make.names(paste("sub.cluster_", cluster, sep = "")),
    resolution = 0.5,
    algorithm = 1
)

Idents(scRNA_Myeloid) = make.names(paste("sub.cluster_", cluster, sep = ""))

### Visualize clusters on UMAP

In [None]:
make.names(paste("sub.cluster_", cluster, sep = ""))

In [None]:
temp = Embeddings(scRNA_Myeloid, 'humap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
humap_embeddings = right_join(scRNA_Myeloid@meta.data %>% select(combined_cell_names, ClusterTop, ClusterFull, biosample_id, sub.cluster_cM01..Monocyte.), temp)
humap_embeddings = humap_embeddings %>% filter(ClusterFull == "cM01 (Monocyte)")

p1 = ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = sub.cluster_cM01..Monocyte.)) + 
geom_point(shape = '.', alpha = 2) + 
ggtitle('UMAP - Subclustered Monocytes') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
scale_color_tableau('Tableau 20') 

p2 = p1 + facet_wrap(~sub.cluster_cM01..Monocyte.) + gghighlight()

options(repr.plot.width = 20, repr.plot.height = 10)
p1 + p2

### Find markers for monocyte subclusters

In [None]:
scRNA_Monocytes = subset(scRNA_Myeloid, subset = ClusterFull == 'cM01 (Monocyte)')

In [None]:
scRNA_Monocytes@meta.data %>% head %>% t

In [None]:
require(presto)
wilcox_Monocytes = wilcoxauc(scRNA_Monocytes)
topMarkers_Monocytes = top_markers(wilcox_Monocytes, auc_min = 0.55, padj_max = 0.05, n = Inf)[,-1]
topMarkers_Monocytes

In [None]:
dim(topMarkers_Monocytes)

### Enrich NMF programs in subcluster markers using clusterProfiler::enricher (despite the name, this is an implementation of the hypergeometric test)

In [None]:
TERM2GENE = myeloid_nmf_programs %>% pivot_longer(cols = colnames(.))
colnames(TERM2GENE) = c('term', 'gene')
TERM2GENE = na.omit(TERM2GENE)
head(TERM2GENE)
dim(TERM2GENE)
dim(TERM2GENE %>% filter(gene %in% rownames(scRNA_Monocytes))  %>% distinct)
dim(TERM2GENE %>% filter(gene %in% rownames(scRNA_Monocytes)))
TERM2GENE %>% filter(gene %in% rownames(scRNA_Monocytes)) %>% select(term) %>% distinct

In [None]:
topMarkers_Monocytes

In [None]:
enrichmentList_Monocytes = map(colnames(topMarkers_Monocytes), function(i){
    return(as.data.frame(enricher(topMarkers_Monocytes[,i] %>% as.matrix() %>% as.vector() %>% na.omit(), 
              TERM2GENE = TERM2GENE)))
})
names(enrichmentList_Monocytes) = colnames(topMarkers_Monocytes)
enrichmentList_Monocytes = bind_rows(enrichmentList_Monocytes, .id = "monocyte_subcluster") %>% as_tibble()
enrichmentList_Monocytes

In [None]:
enrichmentList_Monocytes %>% write.csv('nmf_monocytes.csv')

### Relabel subclusters with top NMF programs

In [None]:
enrichmentList_Monocytes %>%
group_by(monocyte_subcluster) %>%
top_n(n = 1, wt = -p.adjust)

### monocytes

In [None]:
enrichmentList_Monocytes
top_nmf_monocytes = enrichmentList_Monocytes %>%
    group_by(monocyte_subcluster) %>%
    top_n(n = 1, 
          wt = -p.adjust)
top_nmf_monocytes %>% head
renamed_monocyte_programs = top_nmf_monocytes %>% 
    pull(monocyte_subcluster)
names(renamed_monocyte_programs) = paste(top_nmf_monocytes %>% pull(ID), "_monocytes", sep = "")
renamed_monocyte_programs
scRNA_Myeloid@meta.data$sub.cluster_cM01..Monocyte = fct_recode(scRNA_Myeloid@meta.data$sub.cluster_cM01..Monocyte, 
                                                                !!!renamed_monocyte_programs) 
table(scRNA_Myeloid@meta.data$sub.cluster_cM01..Monocyte)

## heatmap of NMF program genes in subclustered monocyte populations

In [None]:
myeloid_nmf_programs[1:5, 1:5]
myeloid_nmf_programs %>% as.matrix %>% as.vector %>% unique %>% length
myeloid_nmf_programs %>% as.matrix %>% as.vector %>% unique %>% intersect(rownames(scRNA_Myeloid)) %>% length

In [None]:
options(repr.plot.width = 25, repr.plot.height = 10)
heatmapInput = wilcox_Monocytes[wilcox_Monocytes$feature %in% c(myeloid_nmf_programs %>% as.matrix %>% as.vector %>% unique %>% intersect(rownames(scRNA_Myeloid))),] %>%
filter(logFC > 0.25) %>% #filter(abs(logFC) > 0.5) %>%
pivot_wider(id_cols = group, names_from = feature, values_from = logFC, values_fill = 0) %>%
as.data.frame %>%
column_to_rownames(var = "group") %>%
as.matrix
heatmapInput = heatmapInput[,TERM2GENE %>% filter(gene %in% colnames(heatmapInput)) %>% arrange(term) %>% select(gene) %>% as.matrix %>% as.vector]

col_fun = colorRamp2(seq(min(heatmapInput),max(heatmapInput), length.out = 9), c("#f7fcfd",
    "#e0ecf4",
    "#bfd3e6",
    "#9ebcda",
    "#8c96c6",
    "#8c6bb1",
    "#88419d",
    "#810f7c",
    "#4d004b"))

# column_ha = HeatmapAnnotation(NMF = colnames(heatmapInput), col = list(NMF = geneColors[colnames(heatmapInput)]))

monocyte_heatmap_unscaled = heatmapInput %>% ComplexHeatmap::Heatmap(col = col_fun, 
                                        cluster_columns = FALSE,
                                         #column_km = floor((ncol(heatmapInput)-1)/10), 
                                         column_gap = unit(0.5, 'cm'), 
                                         #row_km = nrow(heatmapInput)-1, 
                                         row_gap = unit(0.5, 'cm'), 
                                         width = unit(20, "in"), height = unit(3, "in"),
                                         border = TRUE, 
                                         column_title = "Myeloid NMF program genes with logFC > 0.25 between monocyte subclusters. Colors = logFC", 
                                                                     name = 'logFC', 
                                                                     heatmap_legend_param = list(direction = "horizontal")) %>% 
                                         draw(heatmap_legend_side = "top") %>%
                                        grid.grabExpr()
monocyte_heatmap_unscaled %>% grid.draw

In [None]:
options(repr.plot.width = 6, repr.plot.height = 3)
ht_list = NULL

ht_list = map(1:ncol(myeloid_nmf_programs), function(i){
    genes = myeloid_nmf_programs[,i] %>% as.matrix %>% as.vector %>% na.omit
    genes = intersect(colnames(heatmapInput), genes)
    if (length(genes) > 1){
    tempHeatmap = heatmapInput[, genes] %>% ComplexHeatmap::Heatmap(col = col_fun,
            cluster_rows = FALSE,
            show_column_dend = FALSE,
            column_title = colnames(myeloid_nmf_programs)[i],
            column_title_gp = grid::gpar(fontsize = 18),
            name = 'logFC', 
            heatmap_legend_param = list(direction = "horizontal"), 
            column_names_gp = grid::gpar(fontsize = 16),
            row_names_gp = grid::gpar(fontsize = 8)) %>% 
            draw(heatmap_legend_side = "top") %>%
            grid.grabExpr()
    return(tempHeatmap)}
})
options(repr.plot.width = 24, repr.plot.height = 30)
wrap_plots(Filter(Negate(is.null), ht_list), ncol = 3)

In [None]:
options(repr.plot.width = 25, repr.plot.height = 10)
heatmapInput = wilcox_Monocytes[wilcox_Monocytes$feature %in% c(myeloid_nmf_programs %>% as.matrix %>% as.vector %>% unique %>% intersect(rownames(scRNA_Myeloid))),] %>%
filter(logFC > 0.25) %>% #filter(abs(logFC) > 0.5) %>%
pivot_wider(id_cols = group, names_from = feature, values_from = logFC, values_fill = 0) %>%
as.data.frame %>%
column_to_rownames(var = "group") %>%
as.matrix %>%
scale 

monocyte_heatmap_scaled = heatmapInput %>% ComplexHeatmap::Heatmap( 
                                         column_km = floor((ncol(heatmapInput)-1)/10), 
                                         row_gap = unit(0.5, 'cm'), 
                                        width = unit(20, "in"), height = unit(3, "in"),
                                         border = TRUE,
                                         column_title = "Myeloid NMF program genes with logFC > 0.25 between monocyte subclusters. Colors = scaled logFC", name = 'scaled logFC', heatmap_legend_param = list(direction = "horizontal")) %>% 
                                         draw(heatmap_legend_side = "top") %>%
                                        grid.grabExpr()
monocyte_heatmap_scaled %>% grid.draw

In [None]:
options(repr.plot.width = 6, repr.plot.height = 3)
ht_list = NULL
rownames(heatmapInput) = gsub(x = rownames(heatmapInput), pattern = 'Macrophage', replacement = 'Macro')

ht_list = map(1:ncol(myeloid_nmf_programs), function(i){
    genes = myeloid_nmf_programs[,i] %>% as.matrix %>% as.vector %>% na.omit
    genes = intersect(colnames(heatmapInput), genes)
    if (length(genes) > 1){
    tempHeatmap = heatmapInput[, genes] %>% ComplexHeatmap::Heatmap(
            cluster_rows = FALSE,
            show_column_dend = FALSE,
            column_title = colnames(myeloid_nmf_programs)[i],
            column_title_gp = grid::gpar(fontsize = 18),
            name = 'logFC', 
            heatmap_legend_param = list(direction = "horizontal"), 
            column_names_gp = grid::gpar(fontsize = 16),
            row_names_gp = grid::gpar(fontsize = 8)) %>% 
            draw(heatmap_legend_side = "top") %>%
            grid.grabExpr()
    return(tempHeatmap)}
})
options(repr.plot.width = 35, repr.plot.height = 40)
wrap_plots(Filter(Negate(is.null), ht_list), ncol = 3)

## Subcluster macrophage populations

In [None]:
#scRNA_Myeloid@meta.data$ClusterFull %>% table()
scRNA_Myeloid@meta.data$sub.cluster_cM01..Monocyte. %>% table()
scRNA_Myeloid@meta.data$sub.cluster_cM01..Monocyte %>% table()

In [None]:
paste("sub.cluster_", 'cM01 (Monocyte)', sep = "")

In [None]:
cluster = "cM02 (Macrophage-like)"

Idents(scRNA_Myeloid) = scRNA_Myeloid@meta.data$sub.cluster_cM01..Monocyte
unique(Idents(scRNA_Myeloid))

scRNA_Myeloid = FindSubCluster(
    scRNA_Myeloid,
    cluster,
    graph.name = 'humap_fgraph',
    subcluster.name = make.names(paste("sub.cluster_", cluster, sep = "")),
    resolution = 0.5,
    algorithm = 1
)

Idents(scRNA_Myeloid) = make.names(paste("sub.cluster_", cluster, sep = ""))

In [None]:
table(scRNA_Myeloid@meta.data$sub.cluster_cM02..Macrophage.like)

### Visualize clusters on UMAP

In [None]:
make.names(paste("sub.cluster_", cluster, sep = ""))

In [None]:
temp = Embeddings(scRNA_Myeloid, 'humap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
humap_embeddings = right_join(scRNA_Myeloid@meta.data %>% select(combined_cell_names, ClusterTop, ClusterFull, biosample_id, sub.cluster_cM02..Macrophage.like.), temp)
humap_embeddings = humap_embeddings %>% filter(ClusterFull == "cM02 (Macrophage-like)")

p1 = ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = sub.cluster_cM02..Macrophage.like.)) + 
geom_point(shape = '.', alpha = 2) + 
ggtitle('UMAP - Subclustered Macrophages') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
scale_color_tableau('Tableau 20') 

p2 = p1 + facet_wrap(~sub.cluster_cM02..Macrophage.like.) + gghighlight()

options(repr.plot.width = 20, repr.plot.height = 10)

p1 + p2

### Find markers for macrophage subclusters

In [None]:
scRNA_Macrophages = subset(scRNA_Myeloid, subset = ClusterFull == 'cM02 (Macrophage-like)')

In [None]:
require(presto)
wilcox_Macrophages = wilcoxauc(scRNA_Macrophages)
topMarkers_Macrophages = top_markers(wilcox_Macrophages, auc_min = 0.55, padj_max = 0.05, n = Inf)[,-1]
topMarkers_Macrophages

### Enrich NMF programs in subcluster markers using clusterProfiler::enricher (despite the name, this is an implementation of the hypergeometric test)

In [None]:
TERM2GENE = myeloid_nmf_programs %>% pivot_longer(cols = colnames(.))
colnames(TERM2GENE) = c('term', 'gene')
head(TERM2GENE)

In [None]:
enrichmentList_Macrophages = map(colnames(topMarkers_Macrophages), function(i){
    return(as.data.frame(enricher(topMarkers_Macrophages[,i] %>% as.matrix() %>% as.vector() %>% na.omit(), 
              TERM2GENE = TERM2GENE)))
})
names(enrichmentList_Macrophages) = colnames(topMarkers_Macrophages)
enrichmentList_Macrophages = bind_rows(enrichmentList_Macrophages, .id = "macrophage_subcluster") %>% as_tibble()
enrichmentList_Macrophages

In [None]:
write.csv(enrichmentList_Macrophages, 'enrichmentList_Macrophages.csv')

### Relabel subclusters with top NMF programs

In [None]:
enrichmentList_Macrophages %>%
mutate(log10_padj = -1 * log10(p.adjust)) %>%
select(ID, macrophage_subcluster, log10_padj) %>%
pivot_wider(names_from  = macrophage_subcluster, 
            values_from = log10_padj,
            values_fill = 0
           ) %>%
column_to_rownames(var = "ID") %>%
as.matrix %>%
ComplexHeatmap::Heatmap(col = RColorBrewer::brewer.pal(name = "Blues", n = 9))

In [None]:
top_nmf_macrophages = enrichmentList_Macrophages %>%
group_by(macrophage_subcluster) %>%
top_n(n = 1, 
      wt = -p.adjust) 
top_nmf_macrophages %>% head
renamed_macrophage_programs = top_nmf_macrophages %>% pull(macrophage_subcluster)
names(renamed_macrophage_programs) = paste(top_nmf_macrophages %>% pull(ID), "_macrophage", sep = "")
renamed_macrophage_programs
scRNA_Myeloid@meta.data$sub.cluster_cM02..Macrophage.like = fct_recode(scRNA_Myeloid@meta.data$sub.cluster_cM02..Macrophage.like, !!!renamed_macrophage_programs) 

## heatmap of NMF program genes in subclustered macrophage populations

In [None]:
myeloid_nmf_programs[1:5, 1:5]
myeloid_nmf_programs %>% as.matrix %>% as.vector %>% unique %>% length
myeloid_nmf_programs %>% as.matrix %>% as.vector %>% unique %>% intersect(rownames(scRNA_Myeloid)) %>% length

In [None]:
options(repr.plot.width = 25, repr.plot.height = 10)
heatmapInput = wilcox_Macrophages[wilcox_Macrophages$feature %in% c(myeloid_nmf_programs %>% as.matrix %>% as.vector %>% unique %>% intersect(rownames(scRNA_Myeloid))),] %>%
filter(logFC > 0.25) %>% #filter(abs(logFC) > 0.5) %>%
pivot_wider(id_cols = group, names_from = feature, values_from = logFC, values_fill = 0) %>%
as.data.frame %>%
column_to_rownames(var = "group") %>%
as.matrix
col_fun = colorRamp2(seq(min(heatmapInput),max(heatmapInput), length.out = 9), c("#f7fcfd",
    "#e0ecf4",
    "#bfd3e6",
    "#9ebcda",
    "#8c96c6",
    "#8c6bb1",
    "#88419d",
    "#810f7c",
    "#4d004b"))
macrophage_heatmap_unscaled = heatmapInput %>% ComplexHeatmap::Heatmap(col = col_fun, 
                                         column_km = floor((ncol(heatmapInput)-1)/10), 
                                         column_gap = unit(0.5, 'cm'), 
                                         row_km = nrow(heatmapInput)-1, 
                                        width = unit(20, "in"), height = unit(3, "in"),
                                         row_gap = unit(0.5, 'cm'), 
                                         border = TRUE,
                                         column_title = "Myeloid NMF program genes with logFC > 0.25 between macrophage subclusters. Colors = logFC", name = 'logFC', heatmap_legend_param = list(direction = "horizontal")) %>% 
                                         draw(heatmap_legend_side = "top") %>%
                                        grid.grabExpr()
macrophage_heatmap_unscaled %>% grid.draw

In [None]:
options(repr.plot.width = 6, repr.plot.height = 3)
ht_list = NULL
rownames(heatmapInput) = gsub(x = rownames(heatmapInput), pattern = 'Macrophage', replacement = 'Macro')
ht_list = map(1:ncol(myeloid_nmf_programs), function(i){
    genes = myeloid_nmf_programs[,i] %>% as.matrix %>% as.vector %>% na.omit
    genes = intersect(colnames(heatmapInput), genes)
    if (length(genes) > 1){
    tempHeatmap = heatmapInput[, genes] %>% ComplexHeatmap::Heatmap(col = col_fun,
            cluster_rows = FALSE,
            show_column_dend = FALSE,
            column_title = colnames(myeloid_nmf_programs)[i],
            column_title_gp = grid::gpar(fontsize = 18),
            name = 'logFC', 
            heatmap_legend_param = list(direction = "horizontal"), 
            column_names_gp = grid::gpar(fontsize = 16),
            row_names_gp = grid::gpar(fontsize = 8)) %>% 
            draw(heatmap_legend_side = "top") %>%
            grid.grabExpr()
    return(tempHeatmap)}
})
options(repr.plot.width = 35, repr.plot.height = 40)
wrap_plots(Filter(Negate(is.null), ht_list), ncol = 3)

In [None]:
options(repr.plot.width = 25, repr.plot.height = 6)
heatmapInput = wilcox_Macrophages[wilcox_Macrophages$feature %in% c(myeloid_nmf_programs %>% as.matrix %>% as.vector %>% unique %>% intersect(rownames(scRNA_Myeloid))),] %>%
filter(logFC > 0.25) %>% #filter(abs(logFC) > 0.5) %>%
pivot_wider(id_cols = group, names_from = feature, values_from = logFC, values_fill = 0) %>%
as.data.frame %>%
column_to_rownames(var = "group") %>%
as.matrix %>%
scale 
macrophage_heatmap_scaled = heatmapInput %>% ComplexHeatmap::Heatmap( 
                                         column_km = floor((ncol(heatmapInput)-1)/10), 
                                         #column_gap = unit(0.5, 'cm'), 
                                         #row_km = nrow(heatmapInput)-1, 
                                         row_gap = unit(0.5, 'cm'), 
                                        width = unit(20, "in"), height = unit(3, "in"),
                                         border = TRUE,
                                         column_title = "Myeloid NMF program genes with logFC > 0.25 between macrophage subclusters. Colors = scaled logFC", name = 'scaled logFC', heatmap_legend_param = list(direction = "horizontal")) %>% 
                                         draw(heatmap_legend_side = "top") %>%
                                        grid.grabExpr()
macrophage_heatmap_scaled %>% grid.draw

In [None]:
options(repr.plot.width = 6, repr.plot.height = 3)
ht_list = NULL
rownames(heatmapInput) = gsub(x = rownames(heatmapInput), pattern = 'Macrophage', replacement = 'Macro')

ht_list = map(1:ncol(myeloid_nmf_programs), function(i){
    genes = myeloid_nmf_programs[,i] %>% as.matrix %>% as.vector %>% na.omit
    genes = intersect(colnames(heatmapInput), genes)
    if (length(genes) > 1){
    tempHeatmap = heatmapInput[, genes] %>% ComplexHeatmap::Heatmap(
            cluster_rows = FALSE,
            show_column_dend = FALSE,
            column_title = colnames(myeloid_nmf_programs)[i],
            column_title_gp = grid::gpar(fontsize = 18),
            name = 'logFC', 
            heatmap_legend_param = list(direction = "horizontal"), 
            column_names_gp = grid::gpar(fontsize = 16),
            row_names_gp = grid::gpar(fontsize = 8)) %>% 
            draw(heatmap_legend_side = "top") %>%
            grid.grabExpr()
    return(tempHeatmap)}
})
options(repr.plot.width = 35, repr.plot.height = 40)
wrap_plots(Filter(Negate(is.null), ht_list), ncol = 3)

## Healthy vs tumor monocyte subclusters

In [None]:
colnames(scRNA_Monocytes@meta.data)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Monocytes@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return('Tumor')}}))) %>%
group_by(sub.cluster_cM01..Monocyte., MMRStatus, biosample_id) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~sub.cluster_cM01..Monocyte., scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Monocytes@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return(x)}}))) %>%
group_by(sub.cluster_cM01..Monocyte., MMRStatus, biosample_id) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~sub.cluster_cM01..Monocyte., scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

## Healthy vs tumor macrophage subclusters

In [None]:
colnames(scRNA_Macrophages@meta.data)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Macrophages@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return('Tumor')}}))) %>%
group_by(sub.cluster_cM02..Macrophage.like., MMRStatus, biosample_id) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~sub.cluster_cM02..Macrophage.like., scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Macrophages@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return(x)}}))) %>%
group_by(sub.cluster_cM02..Macrophage.like., MMRStatus, biosample_id) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~sub.cluster_cM02..Macrophage.like., scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

## Proliferating clusters?

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10, repr.plot.res = 400)
layout = c("AABB")
patchwork::wrap_plots(
    DimPlot(scRNA_Myeloid, reduction = 'humap', label = FALSE, label.box = FALSE, repel = TRUE, group.by = 'cleaned_fine_types') + 
            scale_color_tableau('Tableau 20') +
            scale_fill_tableau('Tableau 20') +
            ggtitle('Myeloid Clusters') +
            theme(legend.position = 'right'),
    FeaturePlot(scRNA_Myeloid, features = 'MKI67', reduction = 'humap') + 
    ggtitle(str_wrap('MKI67 in all myeloid cells.', width = 50))) + plot_layout(design = layout)

In [None]:
dim(scRNA_Myeloid)

In [None]:
write.csv(wilcox_Macrophages, 'wilcox_macrophages.csv')

In [None]:
write.csv(wilcox_Monocytes, 'wilcox_monocytes.csv')

## Justify new labels
- Once we have labels, we want to justify them to collaborators
- DGE analysis in original labels (post-QC cells) 
- DGE analysis in new labels 
- Correlate logFC in matched clusters

### DGE analysis in original labels (post-QC cells) 

In [None]:
clusterFull_markers = presto::wilcoxauc(scRNA_Myeloid, 'ClusterFull') 
top_markers(clusterFull_markers,
    n = Inf,
    padj = 0.05,
    auc_min = 0.6)

### DGE analysis in new labels 

In [None]:
cleaned_fine_types_markers = presto::wilcoxauc(scRNA_Myeloid, 'cleaned_fine_types') 
top_markers(cleaned_fine_types_markers,
    n = Inf,
    padj = 0.05,
    auc_min = 0.6)

### Correlate logFC in matched clusters

In [None]:
clusterFull_markers = clusterFull_markers %>% filter(group %in% cleaned_fine_types_markers$group)
clusterFull_markers$Comparison = 'ClusterFull'
cleaned_fine_types_markers$Comparison = 'cleaned_fine_types'

In [None]:
comparison_markers = dplyr::left_join(x = clusterFull_markers %>% select(feature, group, logFC), 
                                      y = cleaned_fine_types_markers %>% select(feature, group, logFC),
                                    by = join_by(feature, group))
colnames(comparison_markers) = c('feature', 'group', 'clusterFull', 'cleaned_fine_types')
getColors = coef(lm(cleaned_fine_types ~ clusterFull, 
                    data = comparison_markers))
getIntercept = as.numeric(getColors)[1]
getSlope = as.numeric(getColors)[2]
comparison_markers = comparison_markers %>% 
    mutate(ye = getIntercept + (getSlope*cleaned_fine_types), color = cleaned_fine_types < ye) %>% 
    mutate(labelTRUE = (cleaned_fine_types > quantile(cleaned_fine_types, 0.25)) | (clusterFull > quantile(clusterFull, 0.25))) %>% 
    mutate(label = if_else(labelTRUE, feature, NA)) 
rbind(head(comparison_markers), tail(comparison_markers))

In [None]:
options(repr.plot.width=40, repr.plot.height=30)
ggplot(comparison_markers, aes(x=clusterFull, y=cleaned_fine_types)) + 
geom_point(shape = '.', alpha = 0.5) +
facet_wrap(~group) + 
theme_pubr(base_size = 20) +
xlab('Pelka cell types') +
ylab('Cleaned fine types') +
geom_hline(aes(yintercept = 0)) +
geom_vline(aes(xintercept = 0)) +
ggrepel::geom_label_repel(aes(label=feature, color=color)) + 
geom_smooth(method = "lm", se = FALSE, linetype = 1, alpha = 0.15, linewidth = 0.1)

### Pearson correlations

In [None]:
cellTypeCor = comparison_markers %>% 
group_by(group) %>% 
dplyr::summarize(cor(clusterFull, cleaned_fine_types))
colnames(cellTypeCor) = c('group', 'correlation')
cellTypeCor

## Visualize final clusters in UMAP space, and also the original labels (after merge)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
humap_embeddings = Embeddings(scRNA_Myeloid, 'humap') %>% 
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(., scRNA_Myeloid@meta.data %>% 
              select(combined_cell_names, cleaned_fine_types, ClusterFull))
rbind(humap_embeddings %>% head(), humap_embeddings %>% tail())

#### Cleaned clusters

In [None]:
options(repr.plot.width = 21, repr.plot.height = 10)
constantPalette = rep('red', length(unique(humap_embeddings$cleaned_fine_types)))
names(constantPalette) = unique(humap_embeddings$cleaned_fine_types)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = cleaned_fine_types)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggpubr::theme_pubr(base_size=20) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~cleaned_fine_types) +
gghighlight::gghighlight() +
ggtitle('Cleaned fine types - Myeloid cells')

In [None]:
scRNA_Myeloid = SetIdent(scRNA_Myeloid, value = 'cleaned_fine_types')
wilcox_cleaned_fine_types = wilcoxauc(scRNA_Myeloid)
head(wilcox_cleaned_fine_types)
top_markers(wilcox_cleaned_fine_types, auc_min = 0.7) %>% t

#### Pelka clusters

In [None]:
options(repr.plot.width = 21, repr.plot.height = 7)
constantPalette = rep('red', length(unique(humap_embeddings$ClusterFull)))
names(constantPalette) = unique(humap_embeddings$ClusterFull)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggpubr::theme_pubr(base_size=20) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~ClusterFull) +
gghighlight::gghighlight() +
ggtitle('Pelka fine types - Myeloid cells')

## GLMM to find DE genes between clusters

In [None]:
require(tidyverse)
require(Seurat)
require(data.table)
require(lme4)
require(presto)
require(singlecellmethods)
require(future)
require(furrr)
require(gghighlight)
options(future.globals.maxSize = 1000 * 1024 ^2)
set.seed(1)

In [None]:
pb = presto::collapse_counts(
    GetAssayData(scRNA_Myeloid, 'counts'), 
    scRNA_Myeloid@meta.data, 
    c("biosample_id", "cleaned_fine_types"), 
    min_cells_per_group = 3
)
pb$meta_data %>% head()
dim(pb$counts)

colnames(scRNA_Myeloid@meta.data)
dim(pb$meta_data)
head(pb$meta_data)
dim(pb$counts_mat)

In [None]:
pb$exprs_norm = pb$exprs_norm[rownames(pb$counts_mat), colnames(pb$counts_mat)]
dim(pb$exprs_norm)
pb$exprs_norm[1:5, 1:5]

In [None]:
system.time({
    suppressWarnings({
        presto_res = presto::presto.presto(
            y ~ 1 + (1|cleaned_fine_types) + (1|cleaned_fine_types:biosample_id) + (1|biosample_id) + offset(logUMI), 
            pb$meta_data, 
            pb$counts_mat,
            size_varname = "logUMI", 
            effects_cov = "cleaned_fine_types",
            ncore = 1, 
            min_sigma = .05,
            family = "poisson",
            nsim = 1000
        )    
    })
})
readr::write_rds(presto_res, 'Myeloid_fineTyping_GLMM.rds')

In [None]:
presto_res = readr::read_rds('Myeloid_fineTyping_GLMM.rds')

### Make contrasts

In [None]:
contrasts_mat = make_contrast.presto(
    presto_res, 
    var_contrast = 'cleaned_fine_types')
contrasts_mat

### Find marginal effects

In [None]:
effects_marginal = contrasts.presto(
    presto_res, 
    contrasts_mat, 
    one_tailed = TRUE
) %>% 
    dplyr::mutate(cluster = contrast) %>% 
    dplyr::mutate(
        logFC = sign(beta) * log2(exp(abs(beta))), ## convert stats to log2 for interpretability 
        SD = log2(exp(sigma)),
        zscore = logFC / SD
    ) %>%
    arrange(pvalue)

effects_marginal$fdr = p.adjust(effects_marginal$pvalue, method = 'BH')
effects_marginal$corr_fdr = effects_marginal$fdr
effects_marginal$corr_fdr[effects_marginal$fdr == 0] = min(effects_marginal$fdr[effects_marginal$fdr != 0])
effects_marginal$`-log10_fdr` = (-1) * log10(effects_marginal$corr_fdr) 
dim(effects_marginal)
head(effects_marginal)


In [None]:
meanExp = rowMeans(GetAssayData(scRNA_Myeloid, 'data')) 
meanExp = data.frame(feature = names(meanExp), meanExp = meanExp)
for (cluster in unique(effects_marginal$cluster)) {
    print(cluster)
    temp = GetAssayData(scRNA_Myeloid, 'counts')[,scRNA_Myeloid@meta.data$sampleID[scRNA_Myeloid@meta.data$cleaned_fine_types == cluster]] %>% as.data.frame()
    temp = temp %>%
    rowwise() %>%
    mutate(`N_zeros` = sum(c_across(everything()) == 0)) %>%
    select(`N_zeros`) %>% as.data.frame()
    rownames(temp) = rownames(GetAssayData(scRNA_Myeloid, 'counts'))
    meanExp[,cluster] = temp$`N_zeros`/length(scRNA_Myeloid@meta.data$sampleID[scRNA_Myeloid@meta.data$cleaned_fine_types == cluster])
}
head(meanExp)

#effects_marginal = effects_marginal %>% left_join(meanExp, by = join_by(feature), relationship = 'many-to-one') 
#tail(effects_marginal)

In [None]:
temp = meanExp %>% pivot_longer(cols =unique(effects_marginal$cluster)) 
colnames(temp) = c('feature', 'meanExp', 'contrast', 'prop.expressed')
temp$prop.expressed = 1 - temp$prop.expressed 
effects_marginal = left_join(effects_marginal, temp, by = join_by(feature, contrast)) 
rm(temp)
head(effects_marginal)

# volcano plot

In [None]:
sum(effects_marginal$logFC > 2)
sum(effects_marginal$logFC > 0.5)
sum(effects_marginal$logFC > 0.1)
effects_marginal%>% group_by(cluster) %>% filter(fdr < 0.05) %>% summarize(medianLFC = median(logFC), n = n())

In [None]:
options(repr.plot.width = 15, repr.plot.height = 20, repr.plot.res = 200)
require(ggrepel)
ggplot(effects_marginal, aes(x = logFC, y = `-log10_fdr`)) +
geom_point(shape = '.') +
geom_point(data = effects_marginal[effects_marginal$`-log10_fdr` > (-1) * log10(0.05),], color = 'blue', shape = 16) +
geom_point(data = effects_marginal[effects_marginal$logFC > 2,], color = 'red', shape = 16) +
facet_wrap(~cluster, scales = 'free_y', ncol= 3) +
theme_bw(base_size = 20) +
geom_hline(aes(yintercept = (-1)*log10(0.05)), color = 'lightgrey') +
geom_vline(aes(xintercept = 2), color = 'lightgrey') + 
geom_label_repel(data = effects_marginal[effects_marginal$logFC > 2,], aes(label = feature)) 

In [None]:
#effects_marginal$`-log10_fdr`[effects_marginal$`-log10_fdr` > 50] = 50scRNA_Myeloid@meta.data$sampleID[scRNA_Myeloid@meta.data$clean

## MA plot

In [None]:
# logFC vs mean of normalized counts
ggplot(effects_marginal, aes(x = meanExp, y = logFC)) +
geom_point(shape = '.') +
geom_point(data = effects_marginal[effects_marginal$logFC > 2,], color = 'red', shape = 16) +
geom_label_repel(data = effects_marginal[effects_marginal$logFC > 2,], aes(label = feature)) +
geom_point(data = effects_marginal[effects_marginal$meanExp > 1,], color = 'blue', shape = 16) +
geom_label_repel(data = effects_marginal[effects_marginal$meanExp > 1,], aes(label = feature)) +
facet_wrap(~cluster) +
theme_bw(base_size = 20) +
xlab('Mean normalized expression')

In [None]:
ggplot(effects_marginal, aes(meanExp)) +
geom_histogram() +
facet_wrap(~cluster) +
theme_bw(base_size = 20) +
scale_y_continuous(trans = 'log10') +
xlab('Mean expression') +
ylab('Frequency')

In [None]:
unique(effects_marginal$cluster)
scRNA_Myeloid@meta.data$ClusterFull %>% unique()

In [None]:
# logFC vs percent expressed
options(repr.plot.width = 15, repr.plot.height = 20, repr.plot.res = 200)
ggplot(effects_marginal, aes(x = prop.expressed, y = logFC)) +
geom_point(shape = '.') +
geom_point(data = effects_marginal[effects_marginal$logFC > 2,], color = 'red', shape = 16) +
geom_label_repel(data = effects_marginal[effects_marginal$logFC > 2 & effects_marginal$prop.expressed < 0.5,], aes(label = feature), color = 'red') +
geom_point(data = effects_marginal[effects_marginal$prop.expressed > 0.5,], color = 'blue', shape = 16) +
geom_label_repel(data = effects_marginal[effects_marginal$prop.expressed > 0.5,], aes(label = feature), color = 'blue') +
facet_wrap(~cluster) +
theme_bw(base_size = 20) +
xlab('Proportions of cells expressing the gene')

In [None]:
write.csv(effects_marginal, 'effects_marginal_Myeloid.csv')

## filters 

In [None]:
head(effects_marginal)

In [None]:
effects_marginal2 = effects_marginal %>% 
filter(logFC > 0.1) %>%
filter(prop.expressed > 0.05)

In [None]:
dim(effects_marginal)

### Make a plot of cells lost after QC at every nGene cutoff

- x-axis: number of genes kept
- y-axis: number of cells kept

In [None]:
dim(scRNA_Myeloid)

In [None]:
genes_ranked_by_max_zscore = effects_marginal2 %>% 
        group_by(feature) %>% 
        mutate(max_zscore = max(zscore)) %>% 
        select(feature, max_zscore) %>% 
        distinct() %>%
        arrange(desc(max_zscore))
dim(genes_ranked_by_max_zscore)
rbind(head(genes_ranked_by_max_zscore), tail(genes_ranked_by_max_zscore))

In [None]:
dim(genes_ranked_by_max_zscore)

In [None]:
nGenes_used = c(unique(seq(from = 100, to = min(nrow(genes_ranked_by_max_zscore), nrow(scRNA_Myeloid)))), min(nrow(genes_ranked_by_max_zscore), nrow(scRNA_Myeloid)))
nGenes_used
nCells_left = rep(NA, length(nGenes_used))
names(nCells_left) = as.character(nGenes_used)
countMat = GetAssayData(scRNA_Myeloid, 'counts')
dim(countMat)

for(trial in nGenes_used){
    print(trial)
    selectedGenes = genes_ranked_by_max_zscore$feature[1:trial]
    print(length(selectedGenes %>% unique()))
    counts = countMat[selectedGenes,]
    # filter
    nFeatures = colSums(counts > 0)
    nCounts = colSums(counts)
    nCells_left[[as.character(trial)]] = sum(nFeatures > 10 & nCounts > 30)
    print(nCells_left[[as.character(trial)]])
}

options(repr.plot.width=9, repr.plot.height=9)
nCells_left %>% 
as.data.frame() %>% 
rename('nCells_left' = colnames(.)[1]) %>% 
mutate("nGenes_used" = nGenes_used) %>%
mutate(nGenes_used = as.integer(.$nGenes_used)) %>%
ggplot() + geom_point(aes(x = nGenes_used, y=nCells_left)) + 
ggpubr::theme_pubr() + 
scale_y_continuous(trans='log10') + 
xlab('Number of genes used') + 
ylab('Number of cells left') + 
geom_segment(aes(xend=nGenes_used, 
                 x = nGenes_used, 
                 y=nCells_left), 
             yend=0, 
             linetype=3) + 
ggtitle('GLMM DEGs - Myeloid cells') 

In [None]:
genes_ranked_by_max_zscore

## Select 175 genes and cells with counts > 30 and features > 10

In [None]:
selectedGenes = genes_ranked_by_max_zscore$feature[1:175]
selectedGenes %>% length()
selectedGenes

In [None]:
scRNA_Myeloid_selected_genes = scRNA_Myeloid[selectedGenes, ]

In [None]:
selectedCells = scRNA_Myeloid_selected_genes@meta.data %>% 
    filter(nFeature_RNA > 10 & nCount_RNA > 30) %>%
    select(combined_cell_names) %>%
    as.matrix() %>%
    as.vector()
length(selectedCells)
c(head(selectedCells), tail(selectedCells))

In [None]:
scRNA_Myeloid_selected_genes = scRNA_Myeloid[selectedGenes, selectedCells]
summary(scRNA_Myeloid_selected_genes@meta.data$nFeature_RNA)
summary(scRNA_Myeloid_selected_genes@meta.data$nCount_RNA)

In [None]:
options(repr.plot.height = 7, repr.plot.width = 14)
(ggplot(scRNA_Myeloid_selected_genes@meta.data) +
geom_histogram(aes(nFeature_RNA)) +
ggtitle('nFeature_RNA') +
scale_y_continuous(trans = 'log10')) +
(ggplot(scRNA_Myeloid_selected_genes@meta.data) +
geom_histogram(aes(nCount_RNA)) +
ggtitle('nCount_RNA') +
scale_y_continuous(trans = 'log10'))

## Cache

### Myeloid cells with all genes

In [None]:
readr::write_rds(scRNA_Myeloid, 'Myeloid_fineTyping_all_genes.rds')

### Myeloid cells with selected genes

In [None]:
readr::write_rds(scRNA_Myeloid_selected_genes, 'Myeloid_fineTyping_selected_genes.rds')

In [None]:
getwd()

## CXCL10 on a UMAP

In [None]:
table(scRNA_Myeloid@meta.data$sub.cluster_cM02..Macrophage.like)
scRNA_Myeloid = SetIdent(scRNA_Myeloid, value = scRNA_Myeloid@meta.data$sub.cluster_cM02..Macrophage.like)
scRNA_Myeloid@meta.data$cleaned_fine_types = scRNA_Myeloid@meta.data$sub.cluster_cM02..Macrophage.like

In [None]:
table(scRNA_Myeloid@meta.data$cleaned_fine_types)

In [None]:
options(repr.plot.width = 25, repr.plot.height = 40, repr.plot.res = 400)
layout = c("AABB
            CCDD
            EEEE
            FFFF
            GGHH
            IIII
            JJJJ
            ")
patchwork::wrap_plots(
    DimPlot(scRNA_Myeloid, reduction = 'humap', label = FALSE, label.box = FALSE, repel = TRUE, group.by = 'cleaned_fine_types') + 
            scale_color_tableau('Tableau 20') +
            scale_fill_tableau('Tableau 20') +
            ggtitle('Myeloid Clusters') +
            theme(legend.position = 'right'),
    FeaturePlot(scRNA_Myeloid, features = 'CXCL10', reduction = 'humap') + 
    ggtitle(str_wrap('CXCL10 in all myeloid cells. One population of macrophage and one population of monocyte cells appear to be CXCL10+', width = 50)),
    DimPlot(scRNA_Monocytes, reduction = 'humap', label = TRUE, label.box = TRUE, repel = TRUE, group.by = 'cleaned_fine_types') + 
            scale_color_tableau('Tableau 10') +
            scale_fill_tableau('Tableau 10') +
            ggtitle('Monocyte subclusters') +
            theme(legend.position = 'none'), 
    FeaturePlot(scRNA_Monocytes, features = 'CXCL10', reduction = 'humap') + ggtitle('CXCL10 in monocytes. Subcluster 5 is CXCL10+'),
    monocyte_heatmap_scaled,
    monocyte_heatmap_unscaled,
    DimPlot(scRNA_Macrophages, reduction = 'humap', label = TRUE, label.box = TRUE, repel = TRUE, group.by = 'cleaned_fine_types') + 
            scale_color_tableau('Tableau 10') +
            scale_fill_tableau('Tableau 10') +
            ggtitle('Macrophages subclusters') +
            theme(legend.position = 'none'),     
    FeaturePlot(scRNA_Macrophages, features = 'CXCL10', reduction = 'humap') + ggtitle('CXCL10 in macrophages. Subcluster 5 is CXCL10+'),
    macrophage_heatmap_scaled,
    macrophage_heatmap_unscaled) + plot_layout(design = layout)