# merge coarse typed merfish samples and do a DE analysis with both weighted knn and geneformer labels to compare methods

In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
set.seed(1)

# Load scRNA object - all genes

In [None]:
completeReference = readr::read_rds('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning/pelka_dataset_with_merfish_genes.rds')

In [None]:
completeReference@meta.data$technology = 'scRNA'

In [None]:
completeReference@meta.data$combined_cell_names = colnames(completeReference)
completeReference@meta.data$orig.ident = completeReference@meta.data$biosample_id
completeReference@meta.data$ClusterTop = completeReference@meta.data$clTopLevel
dim(completeReference)

In [None]:
fineTyping_inputs = list.files('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning', full.names = TRUE, recursive = TRUE)
fineTyping_inputs = fineTyping_inputs[grep("*fineTyping_all_genes.rds", x = fineTyping_inputs)]
fineTyping_inputs

cleaned_fine_types = data.frame()
for (file in fineTyping_inputs){
    obj = readr::read_rds(file)
    if (length(intersect(colnames(obj@meta.data), 'cleaned_fine_types')) == 0) {
        obj@meta.data[,'cleaned_fine_types'] = NA
        obj@meta.data$cleaned_fine_types = obj@meta.data$ClusterFull
    }
    write.csv(x = obj@meta.data %>% select(cellID, cleaned_fine_types), file = gsub(file, pattern = '.rds', replacement = '.csv'))
    if (nrow(cleaned_fine_types) == 0) {
        cleaned_fine_types =  obj@meta.data %>% select(cellID, cleaned_fine_types)
    } else {
        cleaned_fine_types = rbind(cleaned_fine_types,  obj@meta.data %>% select(cellID, cleaned_fine_types))
    }
}

dim(cleaned_fine_types)
head(cleaned_fine_types)


completeReference = completeReference[, cleaned_fine_types$cellID]
completeReference@meta.data = left_join(cleaned_fine_types, completeReference@meta.data)


In [None]:
completeReference@meta.data %>% head %>% t

# Load merged and annotated object

In [None]:
mergedObj = readr::read_rds('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/annotated_merged_merfish.rds')

In [None]:
mergedObj@meta.data$technology = 'MERFISH'

In [None]:
mergedObj

In [None]:
sum(is.na(mergedObj@meta.data$geneformer_ClusterTop))
dim(mergedObj@meta.data)
dim(mergedObj)

In [None]:
mergedObj = mergedObj[,mergedObj@meta.data$cell_id[!is.na(mergedObj@meta.data$geneformer_ClusterTop)]]
mergedObj

In [None]:
full_join(table(mergedObj@meta.data$geneformer_ClusterTop) %>% as.data.frame %>% rename('geneformer' = Freq),
table(mergedObj@meta.data$ClusterTop) %>% as.data.frame %>% rename('knn' = Freq)) %>% rename('Cluster' = Var1)

In [None]:
table(mergedObj@meta.data$ClusterTop)
table(mergedObj@meta.data$geneformer_ClusterTop)

## Confusion matrix showing reassigment of cell types

In [None]:
mergedObj@meta.data[is.na(mergedObj@meta.data$geneformer_ClusterTop),] %>%pull( orig.ident )%>% table()

dim(mergedObj)

In [None]:
options(repr.plot.height = 5, repr.plot.width = 7, repr.plot.res = 300)
table(paste('KNN', mergedObj@meta.data$ClusterTop), paste('Geneformer', mergedObj@meta.data$geneformer_ClusterTop)) %>% as.matrix %>% apply(MARGIN = 2, FUN = function(x){return(x/max(x))}) %>% t() %>% Heatmap(name = 'Prop')

## UMAP

In [None]:
scRNA_and_merfish = read_rds('scRNA_and_merfish_Subset.rds')

In [None]:
scRNA_and_merfish@meta.data$ClusterTop[scRNA_and_merfish@meta.data$technology == 'scRNA'] = scRNA_and_merfish@meta.data$clTopLevel[scRNA_and_merfish@meta.data$technology == 'scRNA']

In [None]:
scRNA_and_merfish@meta.data$geneformer_ClusterTop[scRNA_and_merfish@meta.data$technology == 'scRNA'] = scRNA_and_merfish@meta.data$clTopLevel[scRNA_and_merfish@meta.data$technology == 'scRNA']

In [None]:
scRNA_and_merfish@meta.data %>% rownames %>% tail

In [None]:
options(repr.plot.width=10, repr.plot.height=10)
embeddings = Embeddings(scRNA_and_merfish, 'humap') %>% 
    as.data.frame() %>% 
    mutate(cell_id=rownames(.)) %>% 
    full_join(., scRNA_and_merfish@meta.data %>% mutate(cell_id = rownames(.)) %>%
        select(cell_id, ClusterTop, geneformer_ClusterTop, technology, orig.ident)) 
head(embeddings)
tail(embeddings)

In [None]:
options(repr.plot.res = 300, repr.plot.height = 10, repr.plot.width = 10)
ggplot(embeddings) +
geom_point(aes(x = HUMAP_1, 
               y = HUMAP_2, 
               color = ClusterTop), 
           shape='.', 
           alpha=0.5) +
ggthemes::scale_color_colorblind(name = 'KNN') +
ggpubr::theme_pubr(base_size=18) +
facet_wrap(~technology) + 
guides(color = guide_legend(override.aes = list(size=10, shape = 16, alpha = 1)))

In [None]:
options(repr.plot.res = 300, repr.plot.height = 10, repr.plot.width = 10)
ggplot(embeddings) +
geom_point(aes(x = HUMAP_1, 
               y = HUMAP_2, 
               color = geneformer_ClusterTop), 
           shape='.', 
           alpha=0.5) +
ggthemes::scale_color_colorblind(name = 'Geneformer') +
ggpubr::theme_pubr(base_size=18) +
facet_wrap(~technology) + 
guides(color = guide_legend(override.aes = list(size=10, shape = 16, alpha = 1)))

In [None]:
options(repr.plot.res = 300, repr.plot.height = 15, repr.plot.width = 30)
ggplot(embeddings) +
geom_point(aes(x = HUMAP_1, 
               y = HUMAP_2, 
               color = ClusterTop), 
           shape='.', 
           alpha=0.5) +
ggthemes::scale_color_colorblind() +
ggpubr::theme_pubr(base_size=22) +
facet_wrap(~ technology + ClusterTop, nrow = 2) +
theme(legend.position = 'none') +
gghighlight::gghighlight() +
ggtitle('KNN labels')

In [None]:
options(repr.plot.res = 300, repr.plot.height = 15, repr.plot.width = 30)
ggplot(embeddings) +
geom_point(aes(x = HUMAP_1, 
               y = HUMAP_2, 
               color = geneformer_ClusterTop), 
           shape='.', 
           alpha=0.5) +
ggthemes::scale_color_colorblind() +
ggpubr::theme_pubr(base_size=22) +
facet_wrap(~ technology + geneformer_ClusterTop, nrow = 2) +
theme(legend.position = 'none') +
gghighlight::gghighlight() +
ggtitle('Geneformer labels')

## de analysis - coarse types

### scRNA

In [None]:
completeReference = SetIdent(completeReference, value = 'ClusterTop')
scRNA_markers = wilcoxauc(completeReference)
#top_markers(scRNA_markers, auc_min = 0.6, padj_max = 0.05, n = Inf)
top_markers(scRNA_markers, auc_min = 0.6, padj_max = 0.05, n = 10)
#top_markers(scRNA_markers)

### geneformer

In [None]:
mergedObj = SetIdent(mergedObj, value = 'geneformer_ClusterTop')
geneformer_markers = wilcoxauc(mergedObj)
top_markers(geneformer_markers, auc_min = 0.6, padj_max = 0.05, n = 10)
#top_markers(geneformer_markers)

### knn

In [None]:
mergedObj = SetIdent(mergedObj, value = 'ClusterTop')
knnMarkers = wilcoxauc(mergedObj) 
top_markers(knnMarkers, auc_min = 0.6, padj_max = 0.05, n = 10)

### plot correlations between knn and geneformer

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
combnVars = expand.grid(unique(geneformer_markers$group), unique(knnMarkers$group))
head(combnVars)
cols = c("auc > 0.6 in both" = "#009E73",  
         "auc > 0.6 only in weightedknn" = "red", 
         "auc > 0.6 only in geneformer" = "blue", 
         "auc < 0.6 in both" = "black")

In [None]:
temp = dplyr::left_join(geneformer_markers %>% 
                        mutate(logFC_geneformer = logFC, 
                                auc_geneformer = auc) %>% 
                        select(feature,
                               group,
                               logFC_geneformer,
                              auc_geneformer), 
                    knnMarkers %>% 
                        mutate(logFC_knn = logFC, 
                               group_knn = group,
                              auc_knn = auc) %>% 
                        select(feature, 
                               group,
                               logFC_knn,
                                auc_knn),
                       join_by(feature, group))
temp = temp %>% mutate(labelTRUE = (auc_knn > 0.6) | (auc_geneformer > 0.6)) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
temp$color = unlist(lapply(1:nrow(temp), function(i){
    if (temp$auc_knn[i] > 0.6 & temp$auc_geneformer[i] > 0.6){
        return('auc > 0.6 in both')
    } else {
        if (temp$auc_knn[i] > 0.6) {
            return('auc > 0.6 only in weightedknn')
        } else {
            if (temp$auc_geneformer[i] > 0.6){
                return('auc > 0.6 only in geneformer')
            } else {
                return('auc < 0.6 in both')
            }
        } 
    }
}))
temp$color = factor(temp$color, levels = names(cols))
temp = temp %>% filter(!feature == 'CD74')
head(temp)
options(repr.plot.height = 21, repr.plot.width = 15)
temp %>%
ggplot(aes(logFC_geneformer, logFC_knn, color = color)) +
geom_point(shape = '.') +
theme_bw(base_size = 18) +
ggrepel::geom_text_repel(max.overlaps = 15, size = 3, aes(label = label), force = 0.5, point.size = NA) +
scale_color_manual(name = 'AUC', values = cols, drop = FALSE) +
guides(colour = guide_legend(override.aes = list(size=10, shape=16, alpha=1))) +
theme(legend.position = 'left', aspect.ratio = 1) +
geom_abline() +
facet_wrap(~group, scales = 'free', ncol = 2) 

In [None]:
temp = dplyr::left_join(geneformer_markers %>% 
                        mutate(auc_geneformer = auc, 
                                auc_geneformer = auc) %>% 
                        select(feature,
                               group,
                               auc_geneformer,
                              auc_geneformer), 
                    knnMarkers %>% 
                        mutate(auc_knn = auc, 
                               group_knn = group,
                              auc_knn = auc) %>% 
                        select(feature, 
                               group,
                               auc_knn,
                                auc_knn),
                       join_by(feature, group))
temp = temp %>% mutate(labelTRUE = (auc_knn > 0.6) | (auc_geneformer > 0.6)) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
temp$color = unlist(lapply(1:nrow(temp), function(i){
    if (temp$auc_knn[i] > 0.6 & temp$auc_geneformer[i] > 0.6){
        return('auc > 0.6 in both')
    } else {
        if (temp$auc_knn[i] > 0.6) {
            return('auc > 0.6 only in weightedknn')
        } else {
            if (temp$auc_geneformer[i] > 0.6){
                return('auc > 0.6 only in geneformer')
            } else {
                return('auc < 0.6 in both')
            }
        } 
    }
}))
temp$color = factor(temp$color, levels = names(cols))
head(temp)
options(repr.plot.height = 21, repr.plot.width = 15)
temp %>%
ggplot(aes(auc_geneformer, auc_knn, color = color)) +
geom_point(shape = '.') +
theme_bw(base_size = 18) +
ggrepel::geom_text_repel(max.overlaps = 15, size = 3, aes(label = label), force = 0.5, point.size = NA) +
scale_color_manual(name = 'AUC', values = cols, drop = FALSE) +
guides(colour = guide_legend(override.aes = list(size=10, shape=16, alpha=1))) +
theme(legend.position = 'left', aspect.ratio = 1) +
geom_abline() +
facet_wrap(~group, scales = 'fixed', ncol = 2) +
xlim(0,1) +
ylim(0, 1)

### plot correlations between knn and scRNA

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
combnVars = expand.grid(unique(scRNA_markers$group), unique(knnMarkers$group))
head(combnVars)
cols = c("auc > 0.6 in both" = "#009E73",  
         "auc > 0.6 only in weightedknn" = "red", 
         "auc > 0.6 only in scRNA" = "blue", 
         "auc < 0.6 in both" = "black")

In [None]:
head(knnMarkers)

In [None]:
temp = dplyr::right_join(knnMarkers %>% 
                        mutate(logFC_knn = logFC, 
                                auc_knn = auc_knn) %>% 
                        select(feature,
                               group,
                               logFC_knn,
                              auc_knn), 
                    scRNA_markers %>% 
                        mutate(logFC_scRNA = logFC, 
                               group_scRNA = group,
                              auc_scRNA = auc_scRNA) %>% 
                        select(feature, 
                               group,
                               logFC_scRNA,
                                auc_scRNA),
                       join_by(feature, group))
temp = temp %>% mutate(labelTRUE = (auc_knn > 0.6) | (auc_scRNA > 0.6)) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
temp$color = unlist(lapply(1:nrow(temp), function(i){
    if (temp$auc_knn[i] > 0.6 & temp$auc_scRNA[i] > 0.6){
        return('auc > 0.6 in both')
    } else {
        if (temp$auc_knn[i] > 0.6) {
            return('auc > 0.6 only in knn')
        } else {
            if (temp$auc_scRNA[i] > 0.6){
                return('auc > 0.6 only in scRNA')
            } else {
                return('auc < 0.6 in both')
            }
        } 
    }
}))
temp$color = factor(temp$color, levels = names(cols))
temp = temp %>% filter(!feature == 'CD74')
head(temp)
options(repr.plot.height = 21, repr.plot.width = 15)
temp %>%
ggplot(aes(auc_knn, auc_scRNA, color = color)) +
geom_point(shape = '.') +
theme_bw(base_size = 18) +
ggrepel::geom_text_repel(max.overlaps = 15, size = 3, aes(label = label), force = 0.5, point.size = NA) +
scale_color_manual(name = 'AUC', values = cols, drop = FALSE) +
guides(colour = guide_legend(override.aes = list(size=10, shape=16, alpha=1))) +
theme(legend.position = 'left', aspect.ratio = 1) +
geom_abline() +
facet_wrap(~group, scales = 'free', ncol = 2) 

In [None]:
temp = dplyr::right_join(knnMarkers %>% 
                        mutate(logFC_knn = logFC, 
                                auc_knn = auc) %>% 
                        select(feature,
                               group,
                               logFC_knn,
                              auc_knn), 
                    scRNA_markers %>% 
                        mutate(logFC_scRNA = logFC, 
                               group_scRNA = group,
                              auc_scRNA = auc) %>% 
                        select(feature, 
                               group,
                               auc_scRNA,
                               auc_scRNA),
                       join_by(feature, group))
temp = temp %>% mutate(labelTRUE = (auc_knn > 0.6) | (auc_scRNA > 0.6)) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
temp$color = unlist(lapply(1:nrow(temp), function(i){
    if (temp$auc_knn[i] > 0.6 & temp$auc_scRNA[i] > 0.6){
        return('auc > 0.6 in both')
    } else {
        if (temp$auc_knn[i] > 0.6) {
            return('auc > 0.6 only in knn')
        } else {
            if (temp$auc_scRNA[i] > 0.6){
                return('auc > 0.6 only in scRNA')
            } else {
                return('auc < 0.6 in both')
            }
        } 
    }
}))
temp$color = factor(temp$color, levels = names(cols))
temp = temp %>% filter(!feature == 'CD74')
head(temp)
options(repr.plot.height = 21, repr.plot.width = 15)
temp %>%
ggplot(aes(auc_knn, auc_scRNA, color = color)) +
geom_point(shape = '.') +
theme_bw(base_size = 18) +
ggrepel::geom_text_repel(max.overlaps = 15, size = 3, aes(label = label, color = color), force = 0.5, point.size = NA) +
scale_color_manual(name = 'logFC', values = cols, drop = FALSE) +
guides(colour = guide_legend(override.aes = list(size=10, shape=16, alpha=1))) +
theme(legend.position = 'left', aspect.ratio = 1) +
geom_abline() +
facet_wrap(~group, scales = 'free', ncol = 2) #+
#xlim(0,1) +
#ylim(0, 1)

### plot correlations between geneformer and scRNA

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
combnVars = expand.grid(unique(scRNA_markers$group), unique(knnMarkers$group))
head(combnVars)
cols = c("auc > 0.6 in both" = "#009E73",  
         "auc > 0.6 only in geneformer" = "red", 
         "auc > 0.6 only in scRNA" = "blue", 
         "auc < 0.6 in both" = "black")

In [None]:
temp = dplyr::right_join(geneformer_markers %>% 
                        mutate(logFC_geneformer = logFC, 
                                auc_geneformer = auc) %>% 
                        select(feature,
                               group,
                               logFC_geneformer,
                              auc_geneformer), 
                    scRNA_markers %>% 
                        mutate(logFC_scRNA = logFC, 
                               group_scRNA = group,
                              auc_scRNA = auc) %>% 
                        select(feature, 
                               group,
                               logFC_scRNA,
                                auc_scRNA),
                       join_by(feature, group))
temp = temp %>% mutate(labelTRUE = (auc_geneformer > 0.6) | (auc_scRNA > 0.6)) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
temp$color = unlist(lapply(1:nrow(temp), function(i){
    if (temp$auc_geneformer[i] > 0.6 & temp$auc_scRNA[i] > 0.6){
        return('auc > 0.6 in both')
    } else {
        if (temp$auc_geneformer[i] > 0.6) {
            return('auc > 0.6 only in geneformer')
        } else {
            if (temp$auc_scRNA[i] > 0.6){
                return('auc > 0.6 only in scRNA')
            } else {
                return('auc < 0.6 in both')
            }
        } 
    }
}))
temp$color = factor(temp$color, levels = names(cols))
temp = temp %>% filter(!feature == 'CD74')
head(temp)
options(repr.plot.height = 21, repr.plot.width = 15)
temp %>%
ggplot(aes(logFC_geneformer, logFC_scRNA, color = color)) +
geom_point(shape = '.') +
theme_bw(base_size = 18) +
ggrepel::geom_text_repel(max.overlaps = 15, size = 3, aes(label = label), force = 0.5, point.size = NA) +
scale_color_manual(name = 'AUC', values = cols, drop = FALSE) +
guides(colour = guide_legend(override.aes = list(size=10, shape=16, alpha=1))) +
theme(legend.position = 'left', aspect.ratio = 1) +
geom_abline() +
facet_wrap(~group, scales = 'free', ncol = 2) 

In [None]:
temp = dplyr::right_join(geneformer_markers %>% 
                        mutate(auc_geneformer = auc, 
                                auc_geneformer = auc) %>% 
                        select(feature,
                               group,
                               auc_geneformer,
                              auc_geneformer), 
                    scRNA_markers %>% 
                        mutate(auc_scRNA = auc, 
                               group_scRNA = group,
                              auc_scRNA = auc) %>% 
                        select(feature, 
                               group,
                               auc_scRNA,
                                auc_scRNA),
                       join_by(feature, group))
temp = temp %>% mutate(labelTRUE = (auc_geneformer > 0.6) | (auc_scRNA > 0.6)) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
temp$color = unlist(lapply(1:nrow(temp), function(i){
    if (temp$auc_geneformer[i] > 0.6 & temp$auc_scRNA[i] > 0.6){
        return('auc > 0.6 in both')
    } else {
        if (temp$auc_geneformer[i] > 0.6) {
            return('auc > 0.6 only in geneformer')
        } else {
            if (temp$auc_scRNA[i] > 0.6){
                return('auc > 0.6 only in scRNA')
            } else {
                return('auc < 0.6 in both')
            }
        } 
    }
}))
temp$color = factor(temp$color, levels = names(cols))
temp = temp %>% filter(!feature == 'CD74')
head(temp)
options(repr.plot.height = 21, repr.plot.width = 15)
temp %>%
ggplot(aes(auc_geneformer, auc_scRNA, color = color)) +
geom_point(shape = '.') +
theme_bw(base_size = 18) +
ggrepel::geom_text_repel(max.overlaps = 15, size = 3, aes(label = label), force = 0.5, point.size = NA) +
scale_color_manual(name = 'AUC', values = cols, drop = FALSE) +
guides(colour = guide_legend(override.aes = list(size=10, shape=16, alpha=1))) +
theme(legend.position = 'left', aspect.ratio = 1) +
geom_abline() +
facet_wrap(~group, scales = 'free', ncol = 2) 

In [None]:
full_join(scRNA_markers, knnMarkers, join_by(feature, group), suffix = c('_scRNA', '_knn')) %>%
filter(auc_scRNA > 0.7) %>%
group_by(group) %>%
#na.omit() %>%
summarize(cor = cor(logFC_scRNA, logFC_knn))

In [None]:
full_join(scRNA_markers, geneformer_markers, join_by(feature, group), suffix = c('_scRNA', '_geneformer')) %>%
filter(auc_scRNA > 0.7) %>%
group_by(group) %>%
#na.omit() %>%
summarize(cor = cor(logFC_scRNA, logFC_geneformer))

## T cell fine types

### scRNA

In [None]:
completeReference = RenameCells(completeReference, new.names = completeReference@meta.data$cellID)

In [None]:
completeReference@meta.data$ClusterTop %>% table

In [None]:
completeReference@meta.data %>% colnames

In [None]:
tCells = completeReference@meta.data %>% filter(ClusterTop == 'TNKILC') %>% pull(cellID) 
length(tCells)

In [None]:
intersect(colnames(completeReference), tCells) %>% length

In [None]:
scRNA_T = subset(completeReference, cells = tCells)
scRNA_T

In [None]:
scRNA_T@meta.data$cl295v11SubFull %>% table

In [None]:
scRNA_T = SetIdent(scRNA_T, value = "cl295v11SubFull")
head(scRNA_T) %>% head %>% t

In [None]:
scRNA_T_markers = wilcoxauc(scRNA_T)
top_markers(scRNA_T_markers, auc_min = 0.6, padj_max = 0.05, n = 20) %>% as.matrix() %>% t()
top_markers(scRNA_T_markers) %>% as.matrix() %>% t()

### geneformer

In [None]:
geneformer_T = subset(mergedObj, subset = geneformer_ClusterTop == 'TNKILC')
geneformer_T
geneformer_T = SetIdent(geneformer_T, value = "geneformer")
head(geneformer_T) %>% head %>% t

In [None]:
geneformer_T_markers = wilcoxauc(geneformer_T)
top_markers(geneformer_T_markers, auc_min = 0.6, padj_max = 0.05, n = 20) %>% as.matrix() %>% t()
top_markers(geneformer_T_markers) %>% as.matrix() %>% t()

### knn

In [None]:
knn_T = subset(mergedObj, subset = ClusterTop == 'TNKILC')
knn_T

In [None]:
knn_T@meta.data$cleaned_fine_types %>% unique

In [None]:
knn_T@meta.data$cleaned_fine_types[knn_T@meta.data$cleaned_fine_types == 'cTNI22 (cTNI22)'] = 'cTNI23 (NK CD16A+)'

In [None]:
knn_T = SetIdent(knn_T, value = "cleaned_fine_types")
head(knn_T) %>% head %>% t

In [None]:
knn_T_markers = wilcoxauc(knn_T)
top_markers(knn_T_markers, auc_min = 0.6, padj_max = 0.05, n = 20)  %>% as.matrix() %>% t()
top_markers(knn_T_markers) %>% as.matrix() %>% t()

In [None]:
options(repr.plot.width = 15, repr.plot.height = 10, repr.plot.res = 300)
corMat = matrix(data = NA, nrow = length(unique(scRNA_T_markers$group)), ncol = length(unique(knn_T_markers$group)))
rownames(corMat) = unique(scRNA_T_markers$group)
colnames(corMat) = unique(knn_T_markers$group)

for (group_scRNA in unique(scRNA_T_markers$group)){
    for (group_knn in unique(knn_T_markers$group)){
        cor = full_join(scRNA_T_markers %>% 
                        filter(group == group_scRNA), 
                        knn_T_markers %>% 
                        filter(group == group_knn), 
                        join_by(feature), suffix = c('_scRNA', '_knn')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_knn)) %>%
        mutate(group_knn = group_knn, group_scRNA = group_scRNA) %>%
        pull(cor)
        corMat[group_scRNA, group_knn] = cor
    }
}
corMat[is.na(corMat)] = 0
colnames(corMat) = str_wrap(colnames(corMat), width = 25)
rownames(corMat) = str_wrap(rownames(corMat), width = 25)

In [None]:
corMat2 = matrix(data = NA, nrow = length(unique(scRNA_T_markers$group)), ncol = length(unique(geneformer_T_markers$group)))
rownames(corMat2) = unique(scRNA_T_markers$group)
colnames(corMat2) = unique(geneformer_T_markers$group)

for (group_scRNA in unique(scRNA_T_markers$group)){
    for (group_geneformer in unique(geneformer_T_markers$group)){
        cor = full_join(scRNA_T_markers %>% filter(group == group_scRNA), geneformer_T_markers %>% filter(group == group_geneformer), join_by(feature), suffix = c('_scRNA', '_geneformer')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_geneformer)) %>%
        mutate(group_geneformer = group_geneformer, group_scRNA = group_scRNA) %>%
        pull(cor)
        corMat2[group_scRNA, group_geneformer] = cor
    }
}
corMat2[is.na(corMat2)] = 0
colnames(corMat2) = str_wrap(colnames(corMat2), width = 25)
rownames(corMat2) = str_wrap(rownames(corMat2), width = 25)

In [None]:
ht_opt(
    legend_border = "black",
    heatmap_border = TRUE,
    annotation_border = TRUE
)

h1 = Heatmap(corMat, column_title = "KNN", height = unit(10, 'in'), name = 'Corr', column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
h2 = Heatmap(corMat2, column_title = "Geneformer", height = unit(10, 'in'), name = 'Corr',  column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
ht_list = h1 + h2
options(repr.plot.width = 15, repr.plot.height = 16, repr.plot.res = 300)
draw(ht_list, row_title = "scRNA atlas cell types", row_title_gp = gpar(col = "red", fontsize = 32), column_title = "TNKILCs", column_title_gp = gpar(col = 'red', fontsize = 32))

## B fine types

In [None]:
drawCorrelationHeatmap = function(type){
    cells.use = rownames(completeReference@meta.data[completeReference@meta.data$ClusterTop == type,])
    scRNA_T = completeReference[, cells.use]
    scRNA_T = SetIdent(scRNA_T, value = "cl295v11SubFull")
    scRNA_T_markers = wilcoxauc(scRNA_T)
    cells.use = rownames(mergedObj@meta.data[mergedObj@meta.data$geneformer_ClusterTop == type,])
    geneformer_T = mergedObj[, cells.use]
    geneformer_T
    geneformer_T = SetIdent(geneformer_T, value = "geneformer")
    geneformer_T_markers = wilcoxauc(geneformer_T)
    cells.use = rownames(mergedObj@meta.data[mergedObj@meta.data$ClusterTop == type,])
    knn_T = mergedObj[, cells.use]
    knn_T = subset(mergedObj, subset = ClusterTop == type)
    knn_T = SetIdent(knn_T, value = "cleaned_fine_types")
    knn_T_markers = wilcoxauc(knn_T)

    options(repr.plot.width = 15, repr.plot.height = 10, repr.plot.res = 300)
    corMat = matrix(data = NA, nrow = length(unique(scRNA_T_markers$group)), ncol = length(unique(knn_T_markers$group)))
    rownames(corMat) = unique(scRNA_T_markers$group)
    colnames(corMat) = unique(knn_T_markers$group)

    for (group_scRNA in unique(scRNA_T_markers$group)){
        for (group_knn in unique(knn_T_markers$group)){
            cor = full_join(scRNA_T_markers %>% 
                            filter(group == group_scRNA), 
                            knn_T_markers %>% 
                            filter(group == group_knn), 
                            join_by(feature), suffix = c('_scRNA', '_knn')) %>%
            #filter(auc_scRNA > 0.7) %>%
            na.omit() %>%
            summarize(cor = cor(logFC_scRNA, logFC_knn)) %>%
            mutate(group_knn = group_knn, group_scRNA = group_scRNA) %>%
            pull(cor)
            corMat[group_scRNA, group_knn] = cor
        }
    }
    corMat[is.na(corMat)] = 0
    colnames(corMat) = str_wrap(colnames(corMat), width = 25)
    rownames(corMat) = str_wrap(rownames(corMat), width = 25)

    corMat2 = matrix(data = NA, nrow = length(unique(scRNA_T_markers$group)), ncol = length(unique(geneformer_T_markers$group)))
    rownames(corMat2) = unique(scRNA_T_markers$group)
    colnames(corMat2) = unique(geneformer_T_markers$group)

    for (group_scRNA in unique(scRNA_T_markers$group)){
        for (group_geneformer in unique(geneformer_T_markers$group)){
            cor = full_join(scRNA_T_markers %>% filter(group == group_scRNA), geneformer_T_markers %>% filter(group == group_geneformer), join_by(feature), suffix = c('_scRNA', '_geneformer')) %>%
            #filter(auc_scRNA > 0.7) %>%
            na.omit() %>%
            summarize(cor = cor(logFC_scRNA, logFC_geneformer)) %>%
            mutate(group_geneformer = group_geneformer, group_scRNA = group_scRNA) %>%
            pull(cor)
            corMat2[group_scRNA, group_geneformer] = cor
        }
    }
    corMat2[is.na(corMat2)] = 0
    colnames(corMat2) = str_wrap(colnames(corMat2), width = 25)
    rownames(corMat2) = str_wrap(rownames(corMat2), width = 25)

    h1 = Heatmap(corMat, column_title = "KNN", height = unit(10, 'in'), name = 'Corr', column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
    h2 = Heatmap(corMat2, column_title = "Geneformer", height = unit(10, 'in'), name = 'Corr',  column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
    ht_list = h1 + h2
    options(repr.plot.width = 15, repr.plot.height = 16, repr.plot.res = 300)
    draw(ht_list, row_title = "scRNA atlas cell types", row_title_gp = gpar(col = "red", fontsize = 32), column_title = type, column_title_gp = gpar(col = 'red', fontsize = 32))
    return(ht_list)
}

In [None]:
ht_opt(
    legend_border = "black",
    heatmap_border = TRUE,
    annotation_border = TRUE
)

type = 'B'

#drawCorrelationHeatmap('B')

In [None]:
cells.use = completeReference@meta.data$cellID[completeReference@meta.data$ClusterTop == type]
scRNA_T = completeReference[, cells.use]
colnames(scRNA_T@meta.data)

In [None]:
scRNA_T

In [None]:
scRNA_T = SetIdent(scRNA_T, value = "cleaned_fine_types")
scRNA_T_markers = wilcoxauc(scRNA_T)
top_markers(scRNA_T_markers)

cells.use = rownames(mergedObj@meta.data[mergedObj@meta.data$ClusterTop == type,])
knn_T = mergedObj[, cells.use]
knn_T = subset(mergedObj, subset = ClusterTop == type)
knn_T = SetIdent(knn_T, value = "cleaned_fine_types")
knn_T_markers = wilcoxauc(knn_T)

options(repr.plot.width = 15, repr.plot.height = 10, repr.plot.res = 300)
corMat = matrix(data = NA, nrow = length(unique(scRNA_T_markers$group)), ncol = length(unique(knn_T_markers$group)))
rownames(corMat) = unique(scRNA_T_markers$group)
colnames(corMat) = unique(knn_T_markers$group)

corMat = matrix(data = NA, nrow = length(unique(scRNA_T_markers$group)), ncol = length(unique(knn_T_markers$group)))
rownames(corMat) = unique(scRNA_T_markers$group)
colnames(corMat) = unique(knn_T_markers$group)

for (group_scRNA in unique(scRNA_T_markers$group)){
    for (group_knn in unique(knn_T_markers$group)){
        cor = full_join(scRNA_T_markers %>% 
                        filter(group == group_scRNA), 
                        knn_T_markers %>% 
                        filter(group == group_knn), 
                        join_by(feature), suffix = c('_scRNA', '_knn')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_knn)) %>%
        mutate(group_knn = group_knn, group_scRNA = group_scRNA) %>%
        pull(cor)
        corMat[group_scRNA, group_knn] = cor
    }
}

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6, repr.plot.res = 300)

corMat[is.na(corMat)] = 0
colnames(corMat) = str_wrap(colnames(corMat), width = 25)
rownames(corMat) = str_wrap(rownames(corMat), width = 25)

h1 = Heatmap(corMat, column_title = "KNN", height = unit(2, 'in'), name = 'Corr', column_title_gp = gpar(col = 'red', fontsize = 20), rect_gp = gpar(col = "white", lwd = 2))
draw(h1, row_title = "scRNA atlas cell types", row_title_gp = gpar(col = "red", fontsize = 20), column_title = type, column_title_gp = gpar(col = 'red', fontsize = 20))


## Myeloid

In [None]:
type = 'Myeloid'
drawCorrelationHeatmap('Myeloid')

## Strom

In [None]:
type = 'Strom'
drawCorrelationHeatmap('Strom')

## Epi

In [None]:
type = 'Epi'
drawCorrelationHeatmap('Epi')

## GLMM heatmaps

## B cells

In [None]:
type = 'B'

In [None]:
knn = read_csv('/home/mup728/backup_notebooks/B_knn_GLMM.csv')
head(knn)

In [None]:
effects_marginal_scRNA = read_csv('/home/mup728/backup_notebooks/B_scRNA_cleaned_fine_types_GLMM.csv')
head(effects_marginal_scRNA)

In [None]:
corMat = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(knn$contrast)))
rownames(corMat) = unique(effects_marginal_scRNA$contrast)
colnames(corMat) = unique(knn$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_knn in unique(knn$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        knn %>% 
                        filter(contrast == contrast_knn), 
                        join_by(feature), suffix = c('_scRNA', '_knn')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_knn)) %>%
        mutate(contrast_knn = contrast_knn, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat[contrast_scRNA, contrast_knn] = cor
    }
}
corMat[is.na(corMat)] = 0
colnames(corMat) = str_wrap(colnames(corMat), width = 25)
rownames(corMat) = str_wrap(rownames(corMat), width = 25)
corMat

In [None]:
h1 = Heatmap(corMat, column_title = "KNN", height = unit(10, 'in'), name = 'Corr', column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
h1

### Myeloid

In [None]:
type = 'Myeloid'

In [None]:
geneformer = read_csv('/home/mup728/backup_notebooks/Myeloid_geneformer_GLMM.csv')
head(geneformer)

In [None]:
knn = read_csv('/home/mup728/backup_notebooks/Myeloid_knn_GLMM.csv')
head(knn)

In [None]:
effects_marginal_scRNA = read_csv('/home/mup728/backup_notebooks/Myeloid_scRNA_cleaned_fine_types_GLMM.csv')
head(effects_marginal_scRNA)

#### compare knn and scrna

In [None]:
corMat = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(knn$contrast)))
rownames(corMat) = unique(effects_marginal_scRNA$contrast)
colnames(corMat) = unique(knn$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_knn in unique(knn$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        knn %>% 
                        filter(contrast == contrast_knn), 
                        join_by(feature), suffix = c('_scRNA', '_knn')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_knn)) %>%
        mutate(contrast_knn = contrast_knn, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat[contrast_scRNA, contrast_knn] = cor
    }
}
corMat[is.na(corMat)] = 0
colnames(corMat) = str_wrap(colnames(corMat), width = 25)
rownames(corMat) = str_wrap(rownames(corMat), width = 25)
corMat

#### compare geneformer and scrna

In [None]:
corMat2 = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(geneformer$contrast)))
rownames(corMat2) = unique(effects_marginal_scRNA$contrast)
colnames(corMat2) = unique(geneformer$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_geneformer in unique(geneformer$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        geneformer %>% 
                        filter(contrast == contrast_geneformer), 
                        join_by(feature), suffix = c('_scRNA', '_geneformer')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_geneformer)) %>%
        mutate(contrast_geneformer = contrast_geneformer, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat2[contrast_scRNA, contrast_geneformer] = cor
    }
}
corMat2[is.na(corMat2)] = 0
colnames(corMat2) = str_wrap(colnames(corMat2), width = 25)
rownames(corMat2) = str_wrap(rownames(corMat2), width = 25)
corMat2

#### draw heatmaps

In [None]:
h1 = Heatmap(corMat, column_title = "KNN", height = unit(10, 'in'), name = 'Corr', column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
h2 = Heatmap(corMat2, column_title = "Geneformer", height = unit(10, 'in'), name = 'Corr',  column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
ht_list = h1 + h2
options(repr.plot.width = 15, repr.plot.height = 16, repr.plot.res = 300)
draw(ht_list, row_title = "scRNA atlas cell types", row_title_gp = gpar(col = "red", fontsize = 32), column_title = glue(type, ' - glmm'), column_title_gp = gpar(col = 'red', fontsize = 32))

### Strom

In [None]:
type = 'Strom'

In [None]:
geneformer = read_csv(glue('/home/mup728/backup_notebooks/', type, '_geneformer_GLMM.csv'))
head(geneformer)

In [None]:
knn = read_csv(glue('/home/mup728/backup_notebooks/', type, '_knn_GLMM.csv'))
head(knn)

In [None]:
effects_marginal_scRNA = read_csv('/home/mup728/backup_notebooks/Strom_scRNA_original_labels_GLMM.csv')
head(effects_marginal_scRNA)

#### compare knn and scrna

In [None]:
corMat = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(knn$contrast)))
rownames(corMat) = unique(effects_marginal_scRNA$contrast)
colnames(corMat) = unique(knn$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_knn in unique(knn$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        knn %>% 
                        filter(contrast == contrast_knn), 
                        join_by(feature), suffix = c('_scRNA', '_knn')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_knn)) %>%
        mutate(contrast_knn = contrast_knn, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat[contrast_scRNA, contrast_knn] = cor
    }
}
corMat[is.na(corMat)] = 0
colnames(corMat) = str_wrap(colnames(corMat), width = 25)
rownames(corMat) = str_wrap(rownames(corMat), width = 25)
corMat

#### compare geneformer and scrna

In [None]:
corMat2 = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(geneformer$contrast)))
rownames(corMat2) = unique(effects_marginal_scRNA$contrast)
colnames(corMat2) = unique(geneformer$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_geneformer in unique(geneformer$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        geneformer %>% 
                        filter(contrast == contrast_geneformer), 
                        join_by(feature), suffix = c('_scRNA', '_geneformer')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_geneformer)) %>%
        mutate(contrast_geneformer = contrast_geneformer, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat2[contrast_scRNA, contrast_geneformer] = cor
    }
}
corMat2[is.na(corMat2)] = 0
colnames(corMat2) = str_wrap(colnames(corMat2), width = 25)
rownames(corMat2) = str_wrap(rownames(corMat2), width = 25)
corMat2

#### draw heatmaps

In [None]:
h1 = Heatmap(corMat, column_title = "KNN", height = unit(10, 'in'), name = 'Corr', column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
h2 = Heatmap(corMat2, column_title = "Geneformer", height = unit(10, 'in'), name = 'Corr',  column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
ht_list = h1 + h2
options(repr.plot.width = 15, repr.plot.height = 16, repr.plot.res = 300)
draw(ht_list, row_title = "scRNA atlas cell types", row_title_gp = gpar(col = "red", fontsize = 32), column_title = glue(type, ' - glmm'), column_title_gp = gpar(col = 'red', fontsize = 32))

### TNKILC

In [None]:
type = 'TNKILC'

In [None]:
geneformer = read_csv(glue('/home/mup728/backup_notebooks/', type, '_geneformer_GLMM.csv'))
head(geneformer)

In [None]:
knn = read_csv(glue('/home/mup728/backup_notebooks/', type, '_knn_GLMM.csv'))
head(knn)

In [None]:
effects_marginal_scRNA = read_csv('/home/mup728/backup_notebooks/TNKILC_scRNA_original_labels_GLMM.csv')
head(effects_marginal_scRNA)

#### compare knn and scrna

In [None]:
corMat = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(knn$contrast)))
rownames(corMat) = unique(effects_marginal_scRNA$contrast)
colnames(corMat) = unique(knn$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_knn in unique(knn$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        knn %>% 
                        filter(contrast == contrast_knn), 
                        join_by(feature), suffix = c('_scRNA', '_knn')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_knn)) %>%
        mutate(contrast_knn = contrast_knn, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat[contrast_scRNA, contrast_knn] = cor
    }
}
corMat[is.na(corMat)] = 0
colnames(corMat) = str_wrap(colnames(corMat), width = 25)
rownames(corMat) = str_wrap(rownames(corMat), width = 25)
corMat

#### compare geneformer and scrna

In [None]:
corMat2 = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(geneformer$contrast)))
rownames(corMat2) = unique(effects_marginal_scRNA$contrast)
colnames(corMat2) = unique(geneformer$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_geneformer in unique(geneformer$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        geneformer %>% 
                        filter(contrast == contrast_geneformer), 
                        join_by(feature), suffix = c('_scRNA', '_geneformer')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_geneformer)) %>%
        mutate(contrast_geneformer = contrast_geneformer, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat2[contrast_scRNA, contrast_geneformer] = cor
    }
}
corMat2[is.na(corMat2)] = 0
colnames(corMat2) = str_wrap(colnames(corMat2), width = 25)
rownames(corMat2) = str_wrap(rownames(corMat2), width = 25)
corMat2

#### draw heatmaps

In [None]:
h1 = Heatmap(corMat, column_title = "KNN", height = unit(10, 'in'), name = 'Corr', column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
h2 = Heatmap(corMat2, column_title = "Geneformer", height = unit(10, 'in'), name = 'Corr',  column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
ht_list = h1 + h2
options(repr.plot.width = 15, repr.plot.height = 16, repr.plot.res = 300)
draw(ht_list, row_title = "scRNA atlas cell types", row_title_gp = gpar(col = "red", fontsize = 32), column_title = glue(type, ' - glmm'), column_title_gp = gpar(col = 'red', fontsize = 32))

### Epi

In [None]:
type = 'Epi'

In [None]:
geneformer = read_csv(glue('/home/mup728/backup_notebooks/', type, '_geneformer_GLMM.csv'))
head(geneformer)

In [None]:
knn = read_csv(glue('/home/mup728/backup_notebooks/', type, '_knn_GLMM.csv'))
head(knn)

In [None]:
effects_marginal_scRNA = read_csv('/home/mup728/backup_notebooks/Epi_scRNA_original_labels_GLMM.csv')
head(effects_marginal_scRNA)

#### compare knn and scrna

In [None]:
corMat = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(knn$contrast)))
rownames(corMat) = unique(effects_marginal_scRNA$contrast)
colnames(corMat) = unique(knn$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_knn in unique(knn$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        knn %>% 
                        filter(contrast == contrast_knn), 
                        join_by(feature), suffix = c('_scRNA', '_knn')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_knn)) %>%
        mutate(contrast_knn = contrast_knn, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat[contrast_scRNA, contrast_knn] = cor
    }
}
corMat[is.na(corMat)] = 0
colnames(corMat) = str_wrap(colnames(corMat), width = 25)
rownames(corMat) = str_wrap(rownames(corMat), width = 25)
corMat

#### compare geneformer and scrna

In [None]:
corMat2 = matrix(data = NA, nrow = length(unique(effects_marginal_scRNA$contrast)), ncol = length(unique(geneformer$contrast)))
rownames(corMat2) = unique(effects_marginal_scRNA$contrast)
colnames(corMat2) = unique(geneformer$contrast)

for (contrast_scRNA in unique(effects_marginal_scRNA$contrast)){
    for (contrast_geneformer in unique(geneformer$contrast)){
        cor = full_join(effects_marginal_scRNA %>% 
                        filter(contrast == contrast_scRNA), 
                        geneformer %>% 
                        filter(contrast == contrast_geneformer), 
                        join_by(feature), suffix = c('_scRNA', '_geneformer')) %>%
        #filter(auc_scRNA > 0.7) %>%
        na.omit() %>%
        summarize(cor = cor(logFC_scRNA, logFC_geneformer)) %>%
        mutate(contrast_geneformer = contrast_geneformer, contrast_scRNA = contrast_scRNA) %>%
        pull(cor)
        corMat2[contrast_scRNA, contrast_geneformer] = cor
    }
}
corMat2[is.na(corMat2)] = 0
colnames(corMat2) = str_wrap(colnames(corMat2), width = 25)
rownames(corMat2) = str_wrap(rownames(corMat2), width = 25)
corMat2

#### draw heatmaps

In [None]:
h1 = Heatmap(corMat, column_title = "KNN", height = unit(10, 'in'), name = 'Corr', column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
h2 = Heatmap(corMat2, column_title = "Geneformer", height = unit(10, 'in'), name = 'Corr',  column_title_gp = gpar(col = 'red', fontsize = 32), rect_gp = gpar(col = "white", lwd = 2))
ht_list = h1 + h2
options(repr.plot.width = 15, repr.plot.height = 16, repr.plot.res = 300)
draw(ht_list, row_title = "scRNA atlas cell types", row_title_gp = gpar(col = "red", fontsize = 32), column_title = glue(type, ' - glmm'), column_title_gp = gpar(col = 'red', fontsize = 32))