In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
set.seed(1)

In [None]:
mergedCoarseType = readr::read_rds('mergedCoarseType.rds')

# de analysis

In [None]:
mergedCoarseType = SetIdent(mergedCoarseType, value = 'geneformer')
geneformer_markers = wilcoxauc(mergedCoarseType)
top_markers(geneformer_markers, auc_min = 0.7, padj_max = 0.05, n = Inf)
#top_markers(geneformer_markers)

In [None]:
mergedCoarseType = SetIdent(mergedCoarseType, value = 'ClusterTop')
knnMarkers = wilcoxauc(mergedCoarseType) 
top_markers(knnMarkers, auc_min = 0.7, padj_max = 0.05, n = Inf)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
combnVars = expand.grid(unique(geneformer_markers$group), unique(knnMarkers$group))
head(combnVars)
plotlist = list()
cols = c("auc > 0.6 in both" = "#009E73",  "auc > 0.6 only in weightedknn" = "red", "auc > 0.6 only in geneformer" = "blue", "auc < 0.6 in both" = "black")
j = 1
for (i in 1:nrow(combnVars)){
    var1 = as.character(combnVars$Var1[i])
    var2 = as.character(combnVars$Var2[i])
    temp = dplyr::left_join(geneformer_markers %>% 
                                filter(group == var1) %>% 
                                mutate(logFC_geneformer = logFC, 
                                       group_geneformer = group,
                                        auc_geneformer = auc) %>% 
                                select(feature, 
                                       group_geneformer, 
                                       logFC_geneformer,
                                      auc_geneformer), 
                            knnMarkers %>% 
                                filter(group == var2) %>% 
                                mutate(logFC_knn = logFC, 
                                       group_knn = group,
                                      auc_knn = auc) %>% 
                                select(feature, 
                                       group_knn, 
                                       logFC_knn,
                                        auc_knn))
    temp = temp %>% mutate(labelTRUE = (auc_knn > 0.6) | (auc_geneformer > 0.6)) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
    temp$color = unlist(lapply(1:nrow(temp), function(i){
        if (temp$auc_knn[i] > 0.6 & temp$auc_geneformer[i] > 0.6){
            return('auc > 0.6 in both')
        } else {
            if (temp$auc_knn[i] > 0.6) {
                return('auc > 0.6 only in weightedknn')
            } else {
                if (temp$auc_geneformer[i] > 0.6){
                    return('auc > 0.6 only in geneformer')
                } else {
                    return('auc < 0.6 in both')
                }
            } 
        }
    }))
    temp$color = as.factor(temp$color)
    if (var1 == var2) {
        p1 = ggplot(temp, aes(x = logFC_knn, y = logFC_geneformer, color=color), shape = 16) +
        geom_point() +
        ylab(glue::glue('logFC geneformer: ', var1)) +
        xlab(glue::glue('logFC weightedknn: ', var2)) +
        ggtitle(glue::glue(var1, '\nR2: ', round(cor(temp$logFC_geneformer, temp$logFC_knn), 2))) +
        geom_hline(aes(yintercept = 0)) +
        geom_vline(aes(xintercept = 0)) +
        ggrepel::geom_label_repel(aes(label=label, color=color)) + 
        theme_minimal(base_size=18) +
        #geom_abline(intercept = 0, slope = 1) +
        scale_color_manual(values = cols) +
        theme(legend.position='right') +
        guides(color = guide_legend(override.aes = list(size=10, shape=16)))
        print(p1)
        plotlist[[j]] = p1
        j = 1 + j
    }
}
length(plotlist)

# de analysis per sample

In [None]:
coarseTypingOuts = list.files(pattern = "coarse_.*.rds$")
coarseTypingOuts
donors = gsub(pattern = "coarse_|\\.rds", replacement = "", x = coarseTypingOuts)
donors

In [None]:
mergedCoarseType@meta.data$orig.ident %>% unique()

In [None]:
donor

In [None]:
for (donor in donors){

merfish = subset(mergedCoarseType, subset = orig.ident == donor)
merfish = SetIdent(merfish, value = 'geneformer')
geneformer_markers = wilcoxauc(merfish)
print(top_markers(geneformer_markers, auc_min = 0.6, padj_max = 0.05, n = Inf))
#print(top_markers(geneformer_markers))
merfish = SetIdent(merfish, value = 'ClusterTop')
knnMarkers = wilcoxauc(merfish) 
print(top_markers(knnMarkers, auc_min = 0.6, padj_max = 0.05, n = Inf))

options(repr.plot.width = 10, repr.plot.height = 10)
combnVars = expand.grid(unique(geneformer_markers$group), unique(knnMarkers$group))
head(combnVars)
plotlist = list()
cols = c("auc > 0.6 in both" = "#009E73",  "auc > 0.6 only in weightedknn" = "red", "auc > 0.6 only in geneformer" = "blue", "auc < 0.6 in both" = "black")
j = 1
for (i in 1:nrow(combnVars)){
    var1 = as.character(combnVars$Var1[i])
    var2 = as.character(combnVars$Var2[i])
    temp = dplyr::left_join(geneformer_markers %>% 
                                filter(group == var1) %>% 
                                mutate(logFC_geneformer = logFC, 
                                       group_geneformer = group,
                                        auc_geneformer = auc) %>% 
                                select(feature, 
                                       group_geneformer, 
                                       logFC_geneformer,
                                      auc_geneformer), 
                            knnMarkers %>% 
                                filter(group == var2) %>% 
                                mutate(logFC_knn = logFC, 
                                       group_knn = group,
                                      auc_knn = auc) %>% 
                                select(feature, 
                                       group_knn, 
                                       logFC_knn,
                                        auc_knn))
    temp = temp %>% mutate(labelTRUE = (auc_knn > 0.6) | (auc_geneformer > 0.6)) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
    temp$color = unlist(lapply(1:nrow(temp), function(i){
        if (temp$auc_knn[i] > 0.6 & temp$auc_geneformer[i] > 0.6){
            return('auc > 0.6 in both')
        } else {
            if (temp$auc_knn[i] > 0.6) {
                return('auc > 0.6 only in weightedknn')
            } else {
                if (temp$auc_geneformer[i] > 0.6){
                    return('auc > 0.6 only in geneformer')
                } else {
                    return('auc < 0.6 in both')
                }
            } 
        }
    }))
    temp$color = as.factor(temp$color)
    if (var1 == var2) {
        p1 = ggplot(temp, aes(x = logFC_knn, y = logFC_geneformer, color=color), shape = 16) +
        geom_point() +
        ylab(glue::glue('logFC geneformer: ', var1)) +
        xlab(glue::glue('logFC weightedknn: ', var2)) +
        ggtitle(glue::glue(donor, "\n", var1, '\nR2: ', round(cor(temp$logFC_geneformer, temp$logFC_knn), 2))) +
        geom_hline(aes(yintercept = 0)) +
        geom_vline(aes(xintercept = 0)) +
        ggrepel::geom_label_repel(aes(label=label, color=color)) + 
        theme_minimal(base_size=18) +
        #geom_abline(intercept = 0, slope = 1) +
        scale_color_manual(values = cols) +
        theme(legend.position='right') +
        guides(color = guide_legend(override.aes = list(size=10, shape=16)))
        print(p1)
        plotlist[[j]] = p1
        j = 1 + j
    }
}
length(plotlist)

}

In [None]:
mergedCoarseType@meta.data %>% colnames()

In [None]:
write.csv(x = (mergedCoarseType@meta.data %>% select(orig.ident, orig.cell.id.x, geneformer, ClusterTop) %>% as.data.frame()), 
          file = "cell_annotations.csv")

In [None]:
for (donor in donors){

    merfish = subset(mergedCoarseType, subset = orig.ident == donor)
    merfish = SetIdent(merfish, value = 'geneformer')
    geneformer_markers = wilcoxauc(merfish)
    filename = paste(donor, "_geneformer_markers.csv")
    write.csv(top_markers(geneformer_markers, auc_min = 0.6, padj_max = 0.05, n = Inf), file = filename)
    merfish = SetIdent(merfish, value = 'ClusterTop')
    filename = paste(donor, "_weighted_knn_markers.csv")
    knnMarkers = wilcoxauc(merfish) 
    write.csv(top_markers(knnMarkers, auc_min = 0.6, padj_max = 0.05, n = Inf), file = filename)

}